Index: head/share/man/man9/Makefile
===================================================================
--- head/share/man/man9/Makefile	(revision 350693)
+++ head/share/man/man9/Makefile	(revision 350694)
@@ -1,2294 +1,2295 @@
 # $FreeBSD$
 
 .include <src.opts.mk>
 
 MAN=	accept_filter.9 \
 	accf_data.9 \
 	accf_dns.9 \
 	accf_http.9 \
 	acl.9 \
 	alq.9 \
 	altq.9 \
 	atomic.9 \
 	bhnd.9 \
 	bhnd_erom.9 \
 	bios.9 \
 	bitset.9 \
 	boot.9 \
 	bpf.9 \
 	buf.9 \
 	buf_ring.9 \
 	BUF_ISLOCKED.9 \
 	BUF_LOCK.9 \
 	BUF_LOCKFREE.9 \
 	BUF_LOCKINIT.9 \
 	BUF_RECURSED.9 \
 	BUF_TIMELOCK.9 \
 	BUF_UNLOCK.9 \
 	bus_activate_resource.9 \
 	BUS_ADD_CHILD.9 \
 	bus_adjust_resource.9 \
 	bus_alloc_resource.9 \
 	BUS_BIND_INTR.9 \
 	bus_child_present.9 \
 	BUS_CHILD_DELETED.9 \
 	BUS_CHILD_DETACHED.9 \
 	BUS_CONFIG_INTR.9 \
 	BUS_DESCRIBE_INTR.9 \
 	bus_dma.9 \
 	bus_generic_attach.9 \
 	bus_generic_detach.9 \
 	bus_generic_new_pass.9 \
 	bus_generic_print_child.9 \
 	bus_generic_read_ivar.9 \
 	bus_generic_shutdown.9 \
 	BUS_GET_CPUS.9 \
 	bus_get_resource.9 \
 	bus_map_resource.9 \
 	BUS_NEW_PASS.9 \
 	BUS_PRINT_CHILD.9 \
 	BUS_READ_IVAR.9 \
 	BUS_RESCAN.9 \
 	bus_release_resource.9 \
 	bus_set_pass.9 \
 	bus_set_resource.9 \
 	BUS_SETUP_INTR.9 \
 	bus_space.9 \
 	byteorder.9 \
 	casuword.9 \
 	cd.9 \
 	cnv.9 \
 	condvar.9 \
 	config_intrhook.9 \
 	contigmalloc.9 \
 	copy.9 \
 	counter.9 \
 	cpuset.9 \
 	cr_cansee.9 \
 	critical_enter.9 \
 	cr_seeothergids.9 \
 	cr_seeotheruids.9 \
 	crypto.9 \
 	CTASSERT.9 \
 	DB_COMMAND.9 \
 	DECLARE_GEOM_CLASS.9 \
 	DECLARE_MODULE.9 \
 	DEFINE_IFUNC.9 \
 	DELAY.9 \
 	devclass.9 \
 	devclass_find.9 \
 	devclass_get_device.9 \
 	devclass_get_devices.9 \
 	devclass_get_drivers.9 \
 	devclass_get_maxunit.9 \
 	devclass_get_name.9 \
 	devclass_get_softc.9 \
 	dev_clone.9 \
 	devfs_set_cdevpriv.9 \
 	device.9 \
 	device_add_child.9 \
 	DEVICE_ATTACH.9 \
 	device_delete_child.9 \
 	device_delete_children.9 \
 	DEVICE_DETACH.9 \
 	device_enable.9 \
 	device_find_child.9 \
 	device_get_children.9 \
 	device_get_devclass.9 \
 	device_get_driver.9 \
 	device_get_ivars.9 \
 	device_get_name.9 \
 	device_get_parent.9 \
 	device_get_softc.9 \
 	device_get_state.9 \
 	device_get_sysctl.9 \
 	device_get_unit.9 \
 	DEVICE_IDENTIFY.9 \
 	device_printf.9 \
 	DEVICE_PROBE.9 \
 	device_probe_and_attach.9 \
 	device_quiet.9 \
 	device_set_desc.9 \
 	device_set_driver.9 \
 	device_set_flags.9 \
 	DEVICE_SHUTDOWN.9 \
 	DEV_MODULE.9 \
 	dev_refthread.9 \
 	devstat.9 \
 	devtoname.9 \
 	disk.9 \
 	dnv.9 \
 	domain.9 \
 	domainset.9 \
 	dpcpu.9 \
 	drbr.9 \
 	driver.9 \
 	DRIVER_MODULE.9 \
 	efirt.9 \
 	epoch.9 \
 	EVENTHANDLER.9 \
 	eventtimers.9 \
 	extattr.9 \
 	fail.9 \
 	fdt_pinctrl.9 \
 	fetch.9 \
 	firmware.9 \
 	fpu_kern.9 \
 	g_access.9 \
 	g_attach.9 \
 	g_bio.9 \
 	g_consumer.9 \
 	g_data.9 \
 	get_cyclecount.9 \
 	getenv.9 \
 	getnewvnode.9 \
 	g_event.9 \
 	g_geom.9 \
 	g_provider.9 \
 	g_provider_by_name.9 \
 	groupmember.9 \
 	g_wither_geom.9 \
 	hash.9 \
 	hashinit.9 \
 	hexdump.9 \
 	hhook.9 \
 	ieee80211.9 \
 	ieee80211_amrr.9 \
 	ieee80211_beacon.9 \
 	ieee80211_bmiss.9 \
 	ieee80211_crypto.9 \
 	ieee80211_ddb.9 \
 	ieee80211_input.9 \
 	ieee80211_node.9 \
 	ieee80211_output.9 \
 	ieee80211_proto.9 \
 	ieee80211_radiotap.9 \
 	ieee80211_regdomain.9 \
 	ieee80211_scan.9 \
 	ieee80211_vap.9 \
 	iflib.9 \
 	iflibdd.9 \
 	iflibdi.9 \
 	iflibtxrx.9 \
 	ifnet.9 \
 	inittodr.9 \
 	insmntque.9 \
 	intro.9 \
 	ithread.9 \
 	KASSERT.9 \
 	kern_testfrwk.9 \
 	kernacc.9 \
 	kernel_mount.9 \
 	khelp.9 \
 	kobj.9 \
 	kproc.9 \
 	kqueue.9 \
 	kthread.9 \
 	ktr.9 \
 	lock.9 \
 	locking.9 \
 	LOCK_PROFILING.9 \
 	mac.9 \
 	make_dev.9 \
 	malloc.9 \
 	mbchain.9 \
 	mbuf.9 \
 	mbuf_tags.9 \
 	MD5.9 \
 	mdchain.9 \
 	memcchr.9 \
 	memguard.9 \
 	microseq.9 \
 	microtime.9 \
 	microuptime.9 \
 	mi_switch.9 \
 	mod_cc.9 \
 	module.9 \
 	MODULE_DEPEND.9 \
 	MODULE_PNP_INFO.9 \
 	MODULE_VERSION.9 \
 	mtx_pool.9 \
 	mutex.9 \
 	namei.9 \
 	netisr.9 \
 	nv.9 \
 	OF_child.9 \
 	OF_device_from_xref.9 \
 	OF_finddevice.9 \
 	OF_getprop.9 \
 	OF_node_from_xref.9 \
 	OF_package_to_path.9 \
 	ofw_bus_is_compatible.9 \
 	ofw_bus_status_okay.9 \
 	osd.9 \
 	owll.9 \
 	own.9 \
 	panic.9 \
 	PCBGROUP.9 \
 	p_candebug.9 \
 	p_cansee.9 \
 	pci.9 \
 	PCI_IOV_ADD_VF.9 \
 	PCI_IOV_INIT.9 \
 	pci_iov_schema.9 \
 	PCI_IOV_UNINIT.9 \
 	pfil.9 \
 	pfind.9 \
 	pget.9 \
 	pgfind.9 \
 	PHOLD.9 \
 	physio.9 \
 	pmap.9 \
 	pmap_activate.9 \
 	pmap_clear_modify.9 \
 	pmap_copy.9 \
 	pmap_enter.9 \
 	pmap_extract.9 \
 	pmap_growkernel.9 \
 	pmap_init.9 \
 	pmap_is_modified.9 \
 	pmap_is_prefaultable.9 \
 	pmap_map.9 \
 	pmap_mincore.9 \
 	pmap_object_init_pt.9 \
 	pmap_page_exists_quick.9 \
 	pmap_page_init.9 \
 	pmap_pinit.9 \
 	pmap_protect.9 \
 	pmap_qenter.9 \
 	pmap_quick_enter_page.9 \
 	pmap_release.9 \
 	pmap_remove.9 \
 	pmap_resident_count.9 \
 	pmap_unwire.9 \
 	pmap_zero_page.9 \
 	printf.9 \
 	prison_check.9 \
 	priv.9 \
 	proc_rwmem.9 \
 	pseudofs.9 \
 	psignal.9 \
 	pwmbus.9 \
 	random.9 \
 	random_harvest.9 \
 	ratecheck.9 \
 	redzone.9 \
 	refcount.9 \
 	resettodr.9 \
 	resource_int_value.9 \
 	rijndael.9 \
 	rman.9 \
 	rmlock.9 \
 	rtalloc.9 \
 	rtentry.9 \
 	runqueue.9 \
 	rwlock.9 \
 	sbuf.9 \
 	scheduler.9 \
 	SDT.9 \
 	securelevel_gt.9 \
 	selrecord.9 \
 	sema.9 \
 	seqc.9 \
 	sf_buf.9 \
 	sglist.9 \
 	shm_map.9 \
 	signal.9 \
 	sleep.9 \
 	sleepqueue.9 \
 	socket.9 \
 	stack.9 \
 	store.9 \
 	style.9 \
 	style.lua.9 \
 	swi.9 \
 	sx.9 \
 	syscall_helper_register.9 \
 	SYSCALL_MODULE.9 \
 	sysctl.9 \
 	sysctl_add_oid.9 \
 	sysctl_ctx_init.9 \
 	SYSINIT.9 \
 	taskqueue.9 \
 	tcp_functions.9 \
 	thread_exit.9 \
 	time.9 \
 	timeout.9 \
 	tvtohz.9 \
 	ucred.9 \
 	uidinfo.9 \
 	uio.9 \
 	unr.9 \
 	vaccess.9 \
 	vaccess_acl_nfs4.9 \
 	vaccess_acl_posix1e.9 \
 	vcount.9 \
 	vflush.9 \
 	VFS.9 \
 	vfs_busy.9 \
 	VFS_CHECKEXP.9 \
 	vfsconf.9 \
 	VFS_FHTOVP.9 \
 	vfs_getnewfsid.9 \
 	vfs_getopt.9 \
 	vfs_getvfs.9 \
 	VFS_MOUNT.9 \
 	vfs_mountedfrom.9 \
 	VFS_QUOTACTL.9 \
 	VFS_ROOT.9 \
 	vfs_rootmountalloc.9 \
 	VFS_SET.9 \
 	VFS_STATFS.9 \
 	vfs_suser.9 \
 	VFS_SYNC.9 \
 	vfs_timestamp.9 \
 	vfs_unbusy.9 \
 	VFS_UNMOUNT.9 \
 	vfs_unmountall.9 \
 	VFS_VGET.9 \
 	vget.9 \
 	vgone.9 \
 	vhold.9 \
 	vinvalbuf.9 \
 	vm_fault_prefault.9 \
 	vm_map.9 \
 	vm_map_check_protection.9 \
 	vm_map_create.9 \
 	vm_map_delete.9 \
 	vm_map_entry_resize_free.9 \
 	vm_map_find.9 \
 	vm_map_findspace.9 \
 	vm_map_inherit.9 \
 	vm_map_init.9 \
 	vm_map_insert.9 \
 	vm_map_lock.9 \
 	vm_map_lookup.9 \
 	vm_map_madvise.9 \
 	vm_map_max.9 \
 	vm_map_protect.9 \
 	vm_map_remove.9 \
 	vm_map_simplify_entry.9 \
 	vm_map_stack.9 \
 	vm_map_submap.9 \
 	vm_map_sync.9 \
 	vm_map_wire.9 \
 	vm_page_alloc.9 \
 	vm_page_bits.9 \
 	vm_page_busy.9 \
 	vm_page_deactivate.9 \
 	vm_page_dontneed.9 \
 	vm_page_aflag.9 \
 	vm_page_free.9 \
 	vm_page_grab.9 \
 	vm_page_insert.9 \
 	vm_page_lookup.9 \
 	vm_page_rename.9 \
 	vm_page_wire.9 \
 	vm_set_page_size.9 \
 	vmem.9 \
 	vn_fullpath.9 \
 	vn_isdisk.9 \
 	vnet.9 \
 	vnode.9 \
 	VOP_ACCESS.9 \
 	VOP_ACLCHECK.9 \
 	VOP_ADVISE.9 \
 	VOP_ADVLOCK.9 \
 	VOP_ALLOCATE.9 \
 	VOP_ATTRIB.9 \
 	VOP_BMAP.9 \
 	VOP_BWRITE.9 \
 	VOP_COPY_FILE_RANGE.9 \
 	VOP_CREATE.9 \
 	VOP_FSYNC.9 \
 	VOP_GETACL.9 \
 	VOP_GETEXTATTR.9 \
 	VOP_GETPAGES.9 \
 	VOP_INACTIVE.9 \
 	VOP_IOCTL.9 \
 	VOP_LINK.9 \
 	VOP_LISTEXTATTR.9 \
 	VOP_LOCK.9 \
 	VOP_LOOKUP.9 \
 	VOP_OPENCLOSE.9 \
 	VOP_PATHCONF.9 \
 	VOP_PRINT.9 \
 	VOP_RDWR.9 \
 	VOP_READDIR.9 \
 	VOP_READLINK.9 \
 	VOP_REALLOCBLKS.9 \
 	VOP_REMOVE.9 \
 	VOP_RENAME.9 \
 	VOP_REVOKE.9 \
 	VOP_SETACL.9 \
 	VOP_SETEXTATTR.9 \
 	VOP_STRATEGY.9 \
 	VOP_VPTOCNP.9 \
 	VOP_VPTOFH.9 \
 	vref.9 \
 	vrefcnt.9 \
 	vrele.9 \
 	vslock.9 \
 	watchdog.9 \
 	zone.9
 
 MLINKS=	unr.9 alloc_unr.9 \
 	unr.9 alloc_unrl.9 \
 	unr.9 alloc_unr_specific.9 \
 	unr.9 clear_unrhdr.9 \
 	unr.9 delete_unrhdr.9 \
 	unr.9 free_unr.9 \
 	unr.9 new_unrhdr.9
 MLINKS+=accept_filter.9 accept_filt_add.9 \
 	accept_filter.9 accept_filt_del.9 \
 	accept_filter.9 accept_filt_generic_mod_event.9 \
 	accept_filter.9 accept_filt_get.9
 MLINKS+=alq.9 ALQ.9 \
 	alq.9 alq_close.9 \
 	alq.9 alq_flush.9 \
 	alq.9 alq_get.9 \
 	alq.9 alq_getn.9 \
 	alq.9 alq_open.9 \
 	alq.9 alq_open_flags.9 \
 	alq.9 alq_post.9 \
 	alq.9 alq_post_flags.9 \
 	alq.9 alq_write.9 \
 	alq.9 alq_writen.9
 MLINKS+=altq.9 ALTQ.9
 MLINKS+=atomic.9 atomic_add.9 \
 	atomic.9 atomic_clear.9 \
 	atomic.9 atomic_cmpset.9 \
 	atomic.9 atomic_fcmpset.9 \
 	atomic.9 atomic_fetchadd.9 \
 	atomic.9 atomic_load.9 \
 	atomic.9 atomic_readandclear.9 \
 	atomic.9 atomic_set.9 \
 	atomic.9 atomic_store.9 \
 	atomic.9 atomic_subtract.9 \
 	atomic.9 atomic_swap.9 \
 	atomic.9 atomic_testandclear.9 \
 	atomic.9 atomic_testandset.9 \
 	atomic.9 atomic_thread_fence.9
 MLINKS+=bhnd.9 BHND_MATCH_BOARD_TYPE.9 \
 	bhnd.9 BHND_MATCH_BOARD_VENDOR.9 \
 	bhnd.9 BHND_MATCH_CHIP_ID.9 \
 	bhnd.9 BHND_MATCH_CHIP_PKG.9 \
 	bhnd.9 BHND_MATCH_CHIP_REV.9 \
 	bhnd.9 BHND_MATCH_CORE_ID.9 \
 	bhnd.9 BHND_MATCH_CORE_VENDOR.9 \
 	bhnd.9 bhnd_activate_resource.9 \
 	bhnd.9 bhnd_alloc_pmu.9 \
 	bhnd.9 bhnd_alloc_resource.9 \
 	bhnd.9 bhnd_alloc_resource_any.9 \
 	bhnd.9 bhnd_alloc_resources.9 \
 	bhnd.9 bhnd_board_matches.9 \
 	bhnd.9 bhnd_bus_match_child.9 \
 	bhnd.9 bhnd_bus_read_1.9 \
 	bhnd.9 bhnd_bus_read_2.9 \
 	bhnd.9 bhnd_bus_read_4.9 \
 	bhnd.9 bhnd_bus_read_stream_1.9 \
 	bhnd.9 bhnd_bus_read_stream_2.9 \
 	bhnd.9 bhnd_bus_read_stream_4.9 \
 	bhnd.9 bhnd_bus_write_1.9 \
 	bhnd.9 bhnd_bus_write_2.9 \
 	bhnd.9 bhnd_bus_write_4.9 \
 	bhnd.9 bhnd_bus_write_stream_1.9 \
 	bhnd.9 bhnd_bus_write_stream_2.9 \
 	bhnd.9 bhnd_bus_write_stream_4.9 \
 	bhnd.9 bhnd_chip_matches.9 \
 	bhnd.9 bhnd_core_class.9 \
 	bhnd.9 bhnd_core_get_match_desc.9 \
 	bhnd.9 bhnd_core_matches.9 \
 	bhnd.9 bhnd_core_name.9 \
 	bhnd.9 bhnd_cores_equal.9 \
 	bhnd.9 bhnd_deactivate_resource.9 \
 	bhnd.9 bhnd_decode_port_rid.9 \
 	bhnd.9 bhnd_deregister_provider.9 \
 	bhnd.9 bhnd_device_lookup.9 \
 	bhnd.9 bhnd_device_matches.9 \
 	bhnd.9 bhnd_device_quirks.9 \
 	bhnd.9 bhnd_driver_get_erom_class.9 \
 	bhnd.9 bhnd_enable_clocks.9 \
 	bhnd.9 bhnd_find_core_class.9 \
 	bhnd.9 bhnd_find_core_name.9 \
 	bhnd.9 bhnd_format_chip_id.9 \
 	bhnd.9 bhnd_get_attach_type.9 \
 	bhnd.9 bhnd_get_chipid.9 \
 	bhnd.9 bhnd_get_class.9 \
 	bhnd.9 bhnd_get_clock_freq.9 \
 	bhnd.9 bhnd_get_clock_latency.9 \
 	bhnd.9 bhnd_get_core_index.9 \
 	bhnd.9 bhnd_get_core_info.9 \
 	bhnd.9 bhnd_get_core_unit.9 \
 	bhnd.9 bhnd_get_device.9 \
 	bhnd.9 bhnd_get_device_name.9 \
 	bhnd.9 bhnd_get_dma_translation.9 \
 	bhnd.9 bhnd_get_hwrev.9 \
 	bhnd.9 bhnd_get_intr_count.9 \
 	bhnd.9 bhnd_get_intr_ivec.9 \
 	bhnd.9 bhnd_get_port_count.9 \
 	bhnd.9 bhnd_get_port_rid.9 \
 	bhnd.9 bhnd_get_region_addr.9 \
 	bhnd.9 bhnd_get_region_count.9 \
 	bhnd.9 bhnd_get_vendor.9 \
 	bhnd.9 bhnd_get_vendor_name.9 \
 	bhnd.9 bhnd_hwrev_matches.9 \
 	bhnd.9 bhnd_is_hw_suspended.9 \
 	bhnd.9 bhnd_is_region_valid.9 \
 	bhnd.9 bhnd_map_intr.9 \
 	bhnd.9 bhnd_match_core.9 \
 	bhnd.9 bhnd_nvram_getvar.9 \
 	bhnd.9 bhnd_nvram_getvar_array.9 \
 	bhnd.9 bhnd_nvram_getvar_int.9 \
 	bhnd.9 bhnd_nvram_getvar_int16.9 \
 	bhnd.9 bhnd_nvram_getvar_int32.9 \
 	bhnd.9 bhnd_nvram_getvar_int8.9 \
 	bhnd.9 bhnd_nvram_getvar_str.9 \
 	bhnd.9 bhnd_nvram_getvar_uint.9 \
 	bhnd.9 bhnd_nvram_getvar_uint16.9 \
 	bhnd.9 bhnd_nvram_getvar_uint32.9 \
 	bhnd.9 bhnd_nvram_getvar_uint8.9 \
 	bhnd.9 bhnd_nvram_string_array_next.9 \
 	bhnd.9 bhnd_read_board_info.9 \
 	bhnd.9 bhnd_read_config.9 \
 	bhnd.9 bhnd_read_ioctl.9 \
 	bhnd.9 bhnd_read_iost.9 \
 	bhnd.9 bhnd_register_provider.9 \
 	bhnd.9 bhnd_release_ext_rsrc.9 \
 	bhnd.9 bhnd_release_pmu.9 \
 	bhnd.9 bhnd_release_provider.9 \
 	bhnd.9 bhnd_release_resource.9 \
 	bhnd.9 bhnd_release_resources.9 \
 	bhnd.9 bhnd_request_clock.9 \
 	bhnd.9 bhnd_request_ext_rsrc.9 \
 	bhnd.9 bhnd_reset_hw.9 \
 	bhnd.9 bhnd_retain_provider.9 \
 	bhnd.9 bhnd_set_custom_core_desc.9 \
 	bhnd.9 bhnd_set_default_core_desc.9 \
 	bhnd.9 bhnd_suspend_hw.9 \
 	bhnd.9 bhnd_unmap_intr.9 \
 	bhnd.9 bhnd_vendor_name.9 \
 	bhnd.9 bhnd_write_config.9 \
 	bhnd.9 bhnd_write_ioctl.9
 MLINKS+=bhnd_erom.9 bhnd_erom_alloc.9 \
 	bhnd_erom.9 bhnd_erom_dump.9 \
 	bhnd_erom.9 bhnd_erom_fini_static.9 \
 	bhnd_erom.9 bhnd_erom_free.9 \
 	bhnd_erom.9 bhnd_erom_free_core_table.9 \
 	bhnd_erom.9 bhnd_erom_get_core_table.9 \
 	bhnd_erom.9 bhnd_erom_init_static.9 \
 	bhnd_erom.9 bhnd_erom_io.9 \
 	bhnd_erom.9 bhnd_erom_io_fini.9 \
 	bhnd_erom.9 bhnd_erom_io_map.9 \
 	bhnd_erom.9 bhnd_erom_io_read.9 \
 	bhnd_erom.9 bhnd_erom_iobus_init.9 \
 	bhnd_erom.9 bhnd_erom_iores_new.9 \
 	bhnd_erom.9 bhnd_erom_lookup_core.9 \
 	bhnd_erom.9 bhnd_erom_lookup_core_addr.9 \
 	bhnd_erom.9 bhnd_erom_probe.9 \
 	bhnd_erom.9 bhnd_erom_probe_driver_classes.9
 MLINKS+=bitset.9 BITSET_DEFINE.9 \
 	bitset.9 BITSET_T_INITIALIZER.9 \
 	bitset.9 BITSET_FSET.9 \
 	bitset.9 BIT_CLR.9 \
 	bitset.9 BIT_COPY.9 \
 	bitset.9 BIT_ISSET.9 \
 	bitset.9 BIT_SET.9 \
 	bitset.9 BIT_ZERO.9 \
 	bitset.9 BIT_FILL.9 \
 	bitset.9 BIT_SETOF.9 \
 	bitset.9 BIT_EMPTY.9 \
 	bitset.9 BIT_ISFULLSET.9 \
 	bitset.9 BIT_FFS.9 \
 	bitset.9 BIT_COUNT.9 \
 	bitset.9 BIT_SUBSET.9 \
 	bitset.9 BIT_OVERLAP.9 \
 	bitset.9 BIT_CMP.9 \
 	bitset.9 BIT_OR.9 \
 	bitset.9 BIT_AND.9 \
 	bitset.9 BIT_NAND.9 \
 	bitset.9 BIT_CLR_ATOMIC.9 \
 	bitset.9 BIT_SET_ATOMIC.9 \
 	bitset.9 BIT_SET_ATOMIC_ACQ.9 \
 	bitset.9 BIT_AND_ATOMIC.9 \
 	bitset.9 BIT_OR_ATOMIC.9 \
 	bitset.9 BIT_COPY_STORE_REL.9
 MLINKS+=bpf.9 bpfattach.9 \
 	bpf.9 bpfattach2.9 \
 	bpf.9 bpfdetach.9 \
 	bpf.9 bpf_filter.9 \
 	bpf.9 bpf_mtap.9 \
 	bpf.9 bpf_mtap2.9 \
 	bpf.9 bpf_tap.9 \
 	bpf.9 bpf_validate.9
 MLINKS+=buf.9 bp.9
 MLINKS+=buf_ring.9 buf_ring_alloc.9 \
 	buf_ring.9 buf_ring_free.9 \
 	buf_ring.9 buf_ring_enqueue.9 \
 	buf_ring.9 buf_ring_enqueue_bytes.9 \
 	buf_ring.9 buf_ring_dequeue_mc.9 \
 	buf_ring.9 buf_ring_dequeue_sc.9 \
 	buf_ring.9 buf_ring_count.9 \
 	buf_ring.9 buf_ring_empty.9 \
 	buf_ring.9 buf_ring_full.9 \
 	buf_ring.9 buf_ring_peek.9
 MLINKS+=bus_activate_resource.9 bus_deactivate_resource.9
 MLINKS+=bus_alloc_resource.9 bus_alloc_resource_any.9
 MLINKS+=BUS_BIND_INTR.9 bus_bind_intr.9
 MLINKS+=BUS_DESCRIBE_INTR.9 bus_describe_intr.9
 MLINKS+=bus_dma.9 busdma.9 \
 	bus_dma.9 bus_dmamap_create.9 \
 	bus_dma.9 bus_dmamap_destroy.9 \
 	bus_dma.9 bus_dmamap_load.9 \
 	bus_dma.9 bus_dmamap_load_bio.9 \
 	bus_dma.9 bus_dmamap_load_ccb.9 \
 	bus_dma.9 bus_dmamap_load_mbuf.9 \
 	bus_dma.9 bus_dmamap_load_mbuf_sg.9 \
 	bus_dma.9 bus_dmamap_load_uio.9 \
 	bus_dma.9 bus_dmamap_sync.9 \
 	bus_dma.9 bus_dmamap_unload.9 \
 	bus_dma.9 bus_dmamem_alloc.9 \
 	bus_dma.9 bus_dmamem_free.9 \
 	bus_dma.9 bus_dma_tag_create.9 \
 	bus_dma.9 bus_dma_tag_destroy.9
 MLINKS+=bus_generic_read_ivar.9 bus_generic_write_ivar.9
 MLINKS+=BUS_GET_CPUS.9 bus_get_cpus.9
 MLINKS+=bus_map_resource.9 bus_unmap_resource.9 \
 	bus_map_resource.9 resource_init_map_request.9
 MLINKS+=BUS_READ_IVAR.9 BUS_WRITE_IVAR.9
 MLINKS+=BUS_SETUP_INTR.9 bus_setup_intr.9 \
 	BUS_SETUP_INTR.9 BUS_TEARDOWN_INTR.9 \
 	BUS_SETUP_INTR.9 bus_teardown_intr.9
 MLINKS+=bus_space.9 bus_space_alloc.9 \
 	bus_space.9 bus_space_barrier.9 \
 	bus_space.9 bus_space_copy_region_1.9 \
 	bus_space.9 bus_space_copy_region_2.9 \
 	bus_space.9 bus_space_copy_region_4.9 \
 	bus_space.9 bus_space_copy_region_8.9 \
 	bus_space.9 bus_space_copy_region_stream_1.9 \
 	bus_space.9 bus_space_copy_region_stream_2.9 \
 	bus_space.9 bus_space_copy_region_stream_4.9 \
 	bus_space.9 bus_space_copy_region_stream_8.9 \
 	bus_space.9 bus_space_free.9 \
 	bus_space.9 bus_space_map.9 \
 	bus_space.9 bus_space_read_1.9 \
 	bus_space.9 bus_space_read_2.9 \
 	bus_space.9 bus_space_read_4.9 \
 	bus_space.9 bus_space_read_8.9 \
 	bus_space.9 bus_space_read_multi_1.9 \
 	bus_space.9 bus_space_read_multi_2.9 \
 	bus_space.9 bus_space_read_multi_4.9 \
 	bus_space.9 bus_space_read_multi_8.9 \
 	bus_space.9 bus_space_read_multi_stream_1.9 \
 	bus_space.9 bus_space_read_multi_stream_2.9 \
 	bus_space.9 bus_space_read_multi_stream_4.9 \
 	bus_space.9 bus_space_read_multi_stream_8.9 \
 	bus_space.9 bus_space_read_region_1.9 \
 	bus_space.9 bus_space_read_region_2.9 \
 	bus_space.9 bus_space_read_region_4.9 \
 	bus_space.9 bus_space_read_region_8.9 \
 	bus_space.9 bus_space_read_region_stream_1.9 \
 	bus_space.9 bus_space_read_region_stream_2.9 \
 	bus_space.9 bus_space_read_region_stream_4.9 \
 	bus_space.9 bus_space_read_region_stream_8.9 \
 	bus_space.9 bus_space_read_stream_1.9 \
 	bus_space.9 bus_space_read_stream_2.9 \
 	bus_space.9 bus_space_read_stream_4.9 \
 	bus_space.9 bus_space_read_stream_8.9 \
 	bus_space.9 bus_space_set_multi_1.9 \
 	bus_space.9 bus_space_set_multi_2.9 \
 	bus_space.9 bus_space_set_multi_4.9 \
 	bus_space.9 bus_space_set_multi_8.9 \
 	bus_space.9 bus_space_set_multi_stream_1.9 \
 	bus_space.9 bus_space_set_multi_stream_2.9 \
 	bus_space.9 bus_space_set_multi_stream_4.9 \
 	bus_space.9 bus_space_set_multi_stream_8.9 \
 	bus_space.9 bus_space_set_region_1.9 \
 	bus_space.9 bus_space_set_region_2.9 \
 	bus_space.9 bus_space_set_region_4.9 \
 	bus_space.9 bus_space_set_region_8.9 \
 	bus_space.9 bus_space_set_region_stream_1.9 \
 	bus_space.9 bus_space_set_region_stream_2.9 \
 	bus_space.9 bus_space_set_region_stream_4.9 \
 	bus_space.9 bus_space_set_region_stream_8.9 \
 	bus_space.9 bus_space_subregion.9 \
 	bus_space.9 bus_space_unmap.9 \
 	bus_space.9 bus_space_write_1.9 \
 	bus_space.9 bus_space_write_2.9 \
 	bus_space.9 bus_space_write_4.9 \
 	bus_space.9 bus_space_write_8.9 \
 	bus_space.9 bus_space_write_multi_1.9 \
 	bus_space.9 bus_space_write_multi_2.9 \
 	bus_space.9 bus_space_write_multi_4.9 \
 	bus_space.9 bus_space_write_multi_8.9 \
 	bus_space.9 bus_space_write_multi_stream_1.9 \
 	bus_space.9 bus_space_write_multi_stream_2.9 \
 	bus_space.9 bus_space_write_multi_stream_4.9 \
 	bus_space.9 bus_space_write_multi_stream_8.9 \
 	bus_space.9 bus_space_write_region_1.9 \
 	bus_space.9 bus_space_write_region_2.9 \
 	bus_space.9 bus_space_write_region_4.9 \
 	bus_space.9 bus_space_write_region_8.9 \
 	bus_space.9 bus_space_write_region_stream_1.9 \
 	bus_space.9 bus_space_write_region_stream_2.9 \
 	bus_space.9 bus_space_write_region_stream_4.9 \
 	bus_space.9 bus_space_write_region_stream_8.9 \
 	bus_space.9 bus_space_write_stream_1.9 \
 	bus_space.9 bus_space_write_stream_2.9 \
 	bus_space.9 bus_space_write_stream_4.9 \
 	bus_space.9 bus_space_write_stream_8.9
 MLINKS+=byteorder.9 be16dec.9 \
 	byteorder.9 be16enc.9 \
 	byteorder.9 be16toh.9 \
 	byteorder.9 be32dec.9 \
 	byteorder.9 be32enc.9 \
 	byteorder.9 be32toh.9 \
 	byteorder.9 be64dec.9 \
 	byteorder.9 be64enc.9 \
 	byteorder.9 be64toh.9 \
 	byteorder.9 bswap16.9 \
 	byteorder.9 bswap32.9 \
 	byteorder.9 bswap64.9 \
 	byteorder.9 htobe16.9 \
 	byteorder.9 htobe32.9 \
 	byteorder.9 htobe64.9 \
 	byteorder.9 htole16.9 \
 	byteorder.9 htole32.9 \
 	byteorder.9 htole64.9 \
 	byteorder.9 le16dec.9 \
 	byteorder.9 le16enc.9 \
 	byteorder.9 le16toh.9 \
 	byteorder.9 le32dec.9 \
 	byteorder.9 le32enc.9 \
 	byteorder.9 le32toh.9 \
 	byteorder.9 le64dec.9 \
 	byteorder.9 le64enc.9 \
 	byteorder.9 le64toh.9
 MLINKS+=cnv.9 cnvlist.9 \
 	cnv.9 cnvlist_free_binary.9 \
 	cnv.9 cnvlist_free_bool.9 \
 	cnv.9 cnvlist_free_bool_array.9 \
 	cnv.9 cnvlist_free_descriptor.9 \
 	cnv.9 cnvlist_free_descriptor_array.9 \
 	cnv.9 cnvlist_free_null.9 \
 	cnv.9 cnvlist_free_number.9 \
 	cnv.9 cnvlist_free_number_array.9 \
 	cnv.9 cnvlist_free_nvlist.9 \
 	cnv.9 cnvlist_free_nvlist_array.9 \
 	cnv.9 cnvlist_free_string.9 \
 	cnv.9 cnvlist_free_string_array.9 \
 	cnv.9 cnvlist_get_binary.9 \
 	cnv.9 cnvlist_get_bool.9 \
 	cnv.9 cnvlist_get_bool_array.9 \
 	cnv.9 cnvlist_get_descriptor.9 \
 	cnv.9 cnvlist_get_descriptor_array.9 \
 	cnv.9 cnvlist_get_number.9 \
 	cnv.9 cnvlist_get_number_array.9 \
 	cnv.9 cnvlist_get_nvlist.9 \
 	cnv.9 cnvlist_get_nvlist_array.9 \
 	cnv.9 cnvlist_get_string.9 \
 	cnv.9 cnvlist_get_string_array.9 \
 	cnv.9 cnvlist_take_binary.9 \
 	cnv.9 cnvlist_take_bool.9 \
 	cnv.9 cnvlist_take_bool_array.9 \
 	cnv.9 cnvlist_take_descriptor.9 \
 	cnv.9 cnvlist_take_descriptor_array.9 \
 	cnv.9 cnvlist_take_number.9 \
 	cnv.9 cnvlist_take_number_array.9 \
 	cnv.9 cnvlist_take_nvlist.9 \
 	cnv.9 cnvlist_take_nvlist_array.9 \
 	cnv.9 cnvlist_take_string.9 \
 	cnv.9 cnvlist_take_string_array.9
 MLINKS+=condvar.9 cv_broadcast.9 \
 	condvar.9 cv_broadcastpri.9 \
 	condvar.9 cv_destroy.9 \
 	condvar.9 cv_init.9 \
 	condvar.9 cv_signal.9 \
 	condvar.9 cv_timedwait.9 \
 	condvar.9 cv_timedwait_sig.9 \
 	condvar.9 cv_timedwait_sig_sbt.9 \
 	condvar.9 cv_wait.9 \
 	condvar.9 cv_wait_sig.9 \
 	condvar.9 cv_wait_unlock.9 \
 	condvar.9 cv_wmesg.9
 MLINKS+=config_intrhook.9 config_intrhook_disestablish.9 \
 	config_intrhook.9 config_intrhook_establish.9 \
 	config_intrhook.9 config_intrhook_oneshot.9
 MLINKS+=contigmalloc.9 contigmalloc_domainset.9 \
 	contigmalloc.9 contigfree.9
 MLINKS+=casuword.9 casueword.9 \
 	casuword.9 casueword32.9 \
 	casuword.9 casuword32.9
 MLINKS+=copy.9 copyin.9 \
 	copy.9 copyin_nofault.9 \
 	copy.9 copyinstr.9 \
 	copy.9 copyout.9 \
 	copy.9 copyout_nofault.9 \
 	copy.9 copystr.9
 MLINKS+=counter.9 counter_u64_alloc.9 \
 	counter.9 counter_u64_free.9 \
 	counter.9 counter_u64_add.9 \
 	counter.9 counter_enter.9 \
 	counter.9 counter_exit.9 \
 	counter.9 counter_u64_add_protected.9 \
 	counter.9 counter_u64_fetch.9 \
 	counter.9 counter_u64_zero.9 \
 	counter.9 SYSCTL_COUNTER_U64.9 \
 	counter.9 SYSCTL_ADD_COUNTER_U64.9 \
 	counter.9 SYSCTL_COUNTER_U64_ARRAY.9 \
 	counter.9 SYSCTL_ADD_COUNTER_U64_ARRAY.9
 MLINKS+=cpuset.9 CPUSET_T_INITIALIZER.9 \
 	cpuset.9 CPUSET_FSET.9 \
 	cpuset.9 CPU_CLR.9 \
 	cpuset.9 CPU_COPY.9 \
 	cpuset.9 CPU_ISSET.9 \
 	cpuset.9 CPU_SET.9 \
 	cpuset.9 CPU_ZERO.9 \
 	cpuset.9 CPU_FILL.9 \
 	cpuset.9 CPU_SETOF.9 \
 	cpuset.9 CPU_EMPTY.9 \
 	cpuset.9 CPU_ISFULLSET.9 \
 	cpuset.9 CPU_FFS.9 \
 	cpuset.9 CPU_COUNT.9 \
 	cpuset.9 CPU_SUBSET.9 \
 	cpuset.9 CPU_OVERLAP.9 \
 	cpuset.9 CPU_CMP.9 \
 	cpuset.9 CPU_OR.9 \
 	cpuset.9 CPU_AND.9 \
 	cpuset.9 CPU_NAND.9 \
 	cpuset.9 CPU_CLR_ATOMIC.9 \
 	cpuset.9 CPU_SET_ATOMIC.9 \
 	cpuset.9 CPU_SET_ATOMIC_ACQ.9 \
 	cpuset.9 CPU_AND_ATOMIC.9 \
 	cpuset.9 CPU_OR_ATOMIC.9 \
 	cpuset.9 CPU_COPY_STORE_REL.9
 MLINKS+=critical_enter.9 critical.9 \
 	critical_enter.9 critical_exit.9
 MLINKS+=crypto.9 crypto_dispatch.9 \
 	crypto.9 crypto_done.9 \
 	crypto.9 crypto_freereq.9 \
 	crypto.9 crypto_freesession.9 \
 	crypto.9 crypto_get_driverid.9 \
 	crypto.9 crypto_getreq.9 \
 	crypto.9 crypto_kdispatch.9 \
 	crypto.9 crypto_kdone.9 \
 	crypto.9 crypto_kregister.9 \
 	crypto.9 crypto_newsession.9 \
 	crypto.9 crypto_register.9 \
 	crypto.9 crypto_unblock.9 \
 	crypto.9 crypto_unregister.9 \
 	crypto.9 crypto_unregister_all.9
 MLINKS+=DB_COMMAND.9 DB_SHOW_ALL_COMMAND.9 \
 	DB_COMMAND.9 DB_SHOW_COMMAND.9
 MLINKS+=DECLARE_MODULE.9 DECLARE_MODULE_TIED.9
 MLINKS+=dev_clone.9 drain_dev_clone_events.9
 MLINKS+=dev_refthread.9 devvn_refthread.9 \
 	dev_refthread.9 dev_relthread.9
 MLINKS+=devfs_set_cdevpriv.9 devfs_clear_cdevpriv.9 \
 	devfs_set_cdevpriv.9 devfs_get_cdevpriv.9
 MLINKS+=device_add_child.9 device_add_child_ordered.9
 MLINKS+=device_enable.9 device_disable.9 \
 	device_enable.9 device_is_enabled.9
 MLINKS+=device_get_ivars.9 device_set_ivars.9
 MLINKS+=device_get_name.9 device_get_nameunit.9
 MLINKS+=device_get_state.9 device_busy.9 \
 	device_get_state.9 device_is_alive.9 \
 	device_get_state.9 device_is_attached.9 \
 	device_get_state.9 device_unbusy.9
 MLINKS+=device_get_sysctl.9 device_get_sysctl_ctx.9 \
 	device_get_sysctl.9 device_get_sysctl_tree.9
 MLINKS+=device_quiet.9 device_is_quiet.9 \
 	device_quiet.9 device_verbose.9
 MLINKS+=device_set_desc.9 device_get_desc.9 \
 	device_set_desc.9 device_set_desc_copy.9
 MLINKS+=device_set_flags.9 device_get_flags.9
 MLINKS+=devstat.9 devicestat.9 \
 	devstat.9 devstat_add_entry.9 \
 	devstat.9 devstat_end_transaction.9 \
 	devstat.9 devstat_remove_entry.9 \
 	devstat.9 devstat_start_transaction.9
 MLINKS+=disk.9 disk_add_alias.9 \
 	disk.9 disk_alloc.9 \
 	disk.9 disk_create.9 \
 	disk.9 disk_destroy.9 \
 	disk.9 disk_gone.9 \
 	disk.9 disk_resize.9
 MLINKS+=dnv.9 dnvlist.9 \
 	dnv.9 dnvlist_get_binary.9 \
 	dnv.9 dnvlist_get_bool.9 \
 	dnv.9 dnvlist_get_descriptor.9 \
 	dnv.9 dnvlist_get_number.9 \
 	dnv.9 dnvlist_get_nvlist.9 \
 	dnv.9 dnvlist_get_string.9 \
 	dnv.9 dnvlist_take_binary.9 \
 	dnv.9 dnvlist_take_bool.9 \
 	dnv.9 dnvlist_take_descriptor.9 \
 	dnv.9 dnvlist_take_number.9 \
 	dnv.9 dnvlist_take_nvlist.9 \
 	dnv.9 dnvlist_take_string.9
 MLINKS+=domain.9 DOMAIN_SET.9 \
 	domain.9 domain_add.9 \
 	domain.9 pfctlinput.9 \
 	domain.9 pfctlinput2.9 \
 	domain.9 pffinddomain.9 \
 	domain.9 pffindproto.9 \
 	domain.9 pffindtype.9
 MLINKS+=drbr.9 drbr_free.9 \
 	drbr.9 drbr_enqueue.9 \
 	drbr.9 drbr_dequeue.9 \
 	drbr.9 drbr_dequeue_cond.9 \
 	drbr.9 drbr_flush.9 \
 	drbr.9 drbr_empty.9 \
 	drbr.9 drbr_inuse.9 \
 	drbr.9 drbr_stats_update.9
 MLINKS+=DRIVER_MODULE.9 DRIVER_MODULE_ORDERED.9 \
 	DRIVER_MODULE.9 EARLY_DRIVER_MODULE.9 \
 	DRIVER_MODULE.9 EARLY_DRIVER_MODULE_ORDERED.9
 MLINKS+=epoch.9 epoch_context.9 \
 	epoch.9 epoch_alloc.9 \
 	epoch.9 epoch_free.9 \
 	epoch.9 epoch_enter.9 \
 	epoch.9 epoch_exit.9 \
 	epoch.9 epoch_wait.9 \
 	epoch.9 epoch_call.9 \
 	epoch.9 epoch_drain_callbacks.9 \
 	epoch.9 in_epoch.9
 MLINKS+=EVENTHANDLER.9 EVENTHANDLER_DECLARE.9 \
 	EVENTHANDLER.9 EVENTHANDLER_DEFINE.9 \
 	EVENTHANDLER.9 EVENTHANDLER_DEREGISTER.9 \
 	EVENTHANDLER.9 eventhandler_deregister.9 \
 	EVENTHANDLER.9 eventhandler_find_list.9 \
 	EVENTHANDLER.9 EVENTHANDLER_INVOKE.9 \
 	EVENTHANDLER.9 eventhandler_prune_list.9 \
 	EVENTHANDLER.9 EVENTHANDLER_REGISTER.9 \
 	EVENTHANDLER.9 eventhandler_register.9
 MLINKS+=eventtimers.9 et_register.9 \
 	eventtimers.9 et_deregister.9 \
 	eventtimers.9 et_ban.9 \
 	eventtimers.9 et_find.9 \
 	eventtimers.9 et_free.9 \
 	eventtimers.9 et_init.9 \
 	eventtimers.9 ET_LOCK.9 \
 	eventtimers.9 ET_UNLOCK.9 \
 	eventtimers.9 et_start.9 \
 	eventtimers.9 et_stop.9
 MLINKS+=fail.9 KFAIL_POINT_CODE.9 \
 	fail.9 KFAIL_POINT_ERROR.9 \
 	fail.9 KFAIL_POINT_GOTO.9 \
 	fail.9 KFAIL_POINT_RETURN.9 \
 	fail.9 KFAIL_POINT_RETURN_VOID.9
 MLINKS+=fdt_pinctrl.9 fdt_pinctrl_configure.9 \
 	fdt_pinctrl.9 fdt_pinctrl_configure_by_name.9 \
 	fdt_pinctrl.9 fdt_pinctrl_configure_tree.9 \
 	fdt_pinctrl.9 fdt_pinctrl_register.9
 MLINKS+=fetch.9 fubyte.9 \
 	fetch.9 fuword.9 \
 	fetch.9 fuword16.9 \
 	fetch.9 fuword32.9 \
 	fetch.9 fuword64.9 \
 	fetch.9 fueword.9 \
 	fetch.9 fueword32.9 \
 	fetch.9 fueword64.9
 MLINKS+=firmware.9 firmware_get.9 \
 	firmware.9 firmware_put.9 \
 	firmware.9 firmware_register.9 \
 	firmware.9 firmware_unregister.9
 MLINKS+=fpu_kern.9 fpu_kern_alloc_ctx.9 \
 	fpu_kern.9 fpu_kern_free_ctx.9 \
 	fpu_kern.9 fpu_kern_enter.9 \
 	fpu_kern.9 fpu_kern_leave.9 \
 	fpu_kern.9 fpu_kern_thread.9 \
 	fpu_kern.9 is_fpu_kern_thread.9
 MLINKS+=g_attach.9 g_detach.9
 MLINKS+=g_bio.9 g_alloc_bio.9 \
 	g_bio.9 g_clone_bio.9 \
 	g_bio.9 g_destroy_bio.9 \
 	g_bio.9 g_duplicate_bio.9 \
+	g_bio.9 g_format_bio.9 \
 	g_bio.9 g_new_bio.9 \
 	g_bio.9 g_print_bio.9 \
 	g_bio.9 g_reset_bio.9
 MLINKS+=g_consumer.9 g_destroy_consumer.9 \
 	g_consumer.9 g_new_consumer.9
 MLINKS+=g_data.9 g_read_data.9 \
 	g_data.9 g_write_data.9
 MLINKS+=getenv.9 freeenv.9 \
 	getenv.9 getenv_int.9 \
 	getenv.9 getenv_long.9 \
 	getenv.9 getenv_string.9 \
 	getenv.9 getenv_quad.9 \
 	getenv.9 getenv_uint.9 \
 	getenv.9 getenv_ulong.9 \
 	getenv.9 kern_getenv.9 \
 	getenv.9 kern_setenv.9 \
 	getenv.9 kern_unsetenv.9 \
 	getenv.9 setenv.9 \
 	getenv.9 testenv.9 \
 	getenv.9 unsetenv.9
 MLINKS+=g_event.9 g_cancel_event.9 \
 	g_event.9 g_post_event.9 \
 	g_event.9 g_waitfor_event.9
 MLINKS+=g_geom.9 g_destroy_geom.9 \
 	g_geom.9 g_new_geomf.9
 MLINKS+=g_provider.9 g_destroy_provider.9 \
 	g_provider.9 g_error_provider.9 \
 	g_provider.9 g_new_providerf.9
 MLINKS+=hash.9 hash32.9 \
 	hash.9 hash32_buf.9 \
 	hash.9 hash32_str.9 \
 	hash.9 hash32_stre.9 \
 	hash.9 hash32_strn.9 \
 	hash.9 hash32_strne.9 \
 	hash.9 jenkins_hash.9 \
 	hash.9 jenkins_hash32.9
 MLINKS+=hashinit.9 hashdestroy.9 \
 	hashinit.9 hashinit_flags.9 \
 	hashinit.9 phashinit.9
 MLINKS+=hhook.9 hhook_head_register.9 \
 	hhook.9 hhook_head_deregister.9 \
 	hhook.9 hhook_head_deregister_lookup.9 \
 	hhook.9 hhook_run_hooks.9 \
 	hhook.9 HHOOKS_RUN_IF.9 \
 	hhook.9 HHOOKS_RUN_LOOKUP_IF.9
 MLINKS+=ieee80211.9 ieee80211_ifattach.9 \
 	ieee80211.9 ieee80211_ifdetach.9
 MLINKS+=ieee80211_amrr.9 ieee80211_amrr_choose.9 \
 	ieee80211_amrr.9 ieee80211_amrr_cleanup.9 \
 	ieee80211_amrr.9 ieee80211_amrr_init.9 \
 	ieee80211_amrr.9 ieee80211_amrr_node_init.9 \
 	ieee80211_amrr.9 ieee80211_amrr_setinterval.9 \
 	ieee80211_amrr.9 ieee80211_amrr_tx_complete.9 \
 	ieee80211_amrr.9 ieee80211_amrr_tx_update.9
 MLINKS+=ieee80211_beacon.9 ieee80211_beacon_alloc.9 \
 	ieee80211_beacon.9 ieee80211_beacon_notify.9 \
 	ieee80211_beacon.9 ieee80211_beacon_update.9
 MLINKS+=ieee80211_bmiss.9 ieee80211_beacon_miss.9
 MLINKS+=ieee80211_crypto.9 ieee80211_crypto_available.9 \
 	ieee80211_crypto.9 ieee80211_crypto_decap.9 \
 	ieee80211_crypto.9 ieee80211_crypto_delglobalkeys.9 \
 	ieee80211_crypto.9 ieee80211_crypto_delkey.9 \
 	ieee80211_crypto.9 ieee80211_crypto_demic.9 \
 	ieee80211_crypto.9 ieee80211_crypto_encap.9 \
 	ieee80211_crypto.9 ieee80211_crypto_enmic.9 \
 	ieee80211_crypto.9 ieee80211_crypto_newkey.9 \
 	ieee80211_crypto.9 ieee80211_crypto_register.9 \
 	ieee80211_crypto.9 ieee80211_crypto_reload_keys.9 \
 	ieee80211_crypto.9 ieee80211_crypto_setkey.9 \
 	ieee80211_crypto.9 ieee80211_crypto_unregister.9 \
 	ieee80211_crypto.9 ieee80211_key_update_begin.9 \
 	ieee80211_crypto.9 ieee80211_key_update_end.9 \
 	ieee80211_crypto.9 ieee80211_notify_michael_failure.9 \
 	ieee80211_crypto.9 ieee80211_notify_replay_failure.9
 MLINKS+=ieee80211_input.9 ieee80211_input_all.9
 MLINKS+=ieee80211_node.9 ieee80211_dump_node.9 \
 	ieee80211_node.9 ieee80211_dump_nodes.9 \
 	ieee80211_node.9 ieee80211_find_rxnode.9 \
 	ieee80211_node.9 ieee80211_find_rxnode_withkey.9 \
 	ieee80211_node.9 ieee80211_free_node.9 \
 	ieee80211_node.9 ieee80211_iterate_nodes.9 \
 	ieee80211_node.9 ieee80211_ref_node.9 \
 	ieee80211_node.9 ieee80211_unref_node.9
 MLINKS+=ieee80211_output.9 ieee80211_process_callback.9 \
 	ieee80211_output.9 M_SEQNO_GET.9 \
 	ieee80211_output.9 M_WME_GETAC.9
 MLINKS+=ieee80211_proto.9 ieee80211_new_state.9 \
 	ieee80211_proto.9 ieee80211_resume_all.9 \
 	ieee80211_proto.9 ieee80211_start_all.9 \
 	ieee80211_proto.9 ieee80211_stop_all.9 \
 	ieee80211_proto.9 ieee80211_suspend_all.9 \
 	ieee80211_proto.9 ieee80211_waitfor_parent.9
 MLINKS+=ieee80211_radiotap.9 ieee80211_radiotap_active.9 \
 	ieee80211_radiotap.9 ieee80211_radiotap_active_vap.9 \
 	ieee80211_radiotap.9 ieee80211_radiotap_attach.9 \
 	ieee80211_radiotap.9 ieee80211_radiotap_tx.9 \
 	ieee80211_radiotap.9 radiotap.9
 MLINKS+=ieee80211_regdomain.9 ieee80211_alloc_countryie.9 \
 	ieee80211_regdomain.9 ieee80211_init_channels.9 \
 	ieee80211_regdomain.9 ieee80211_sort_channels.9
 MLINKS+=ieee80211_scan.9 ieee80211_add_scan.9 \
 	ieee80211_scan.9 ieee80211_bg_scan.9 \
 	ieee80211_scan.9 ieee80211_cancel_scan.9 \
 	ieee80211_scan.9 ieee80211_cancel_scan_any.9 \
 	ieee80211_scan.9 ieee80211_check_scan.9 \
 	ieee80211_scan.9 ieee80211_check_scan_current.9 \
 	ieee80211_scan.9 ieee80211_flush.9 \
 	ieee80211_scan.9 ieee80211_probe_curchan.9 \
 	ieee80211_scan.9 ieee80211_scan_assoc_fail.9 \
 	ieee80211_scan.9 ieee80211_scan_done.9 \
 	ieee80211_scan.9 ieee80211_scan_dump_channels.9 \
 	ieee80211_scan.9 ieee80211_scan_flush.9 \
 	ieee80211_scan.9 ieee80211_scan_iterate.9 \
 	ieee80211_scan.9 ieee80211_scan_next.9 \
 	ieee80211_scan.9 ieee80211_scan_timeout.9 \
 	ieee80211_scan.9 ieee80211_scanner_get.9 \
 	ieee80211_scan.9 ieee80211_scanner_register.9 \
 	ieee80211_scan.9 ieee80211_scanner_unregister.9 \
 	ieee80211_scan.9 ieee80211_scanner_unregister_all.9 \
 	ieee80211_scan.9 ieee80211_start_scan.9
 MLINKS+=ieee80211_vap.9 ieee80211_vap_attach.9 \
 	ieee80211_vap.9 ieee80211_vap_detach.9 \
 	ieee80211_vap.9 ieee80211_vap_setup.9
 MLINKS+=iflibdd.9 ifdi_attach_pre.9 \
 	iflibdd.9 ifdi_attach_post.9 \
 	iflibdd.9 ifdi_detach.9 \
 	iflibdd.9 ifdi_get_counter.9 \
 	iflibdd.9 ifdi_i2c_req.9 \
 	iflibdd.9 ifdi_init.9 \
 	iflibdd.9 ifdi_intr_enable.9 \
 	iflibdd.9 ifdi_intr_disable.9 \
 	iflibdd.9 ifdi_led_func.9 \
 	iflibdd.9 ifdi_link_intr_enable.9 \
 	iflibdd.9 ifdi_media_set.9 \
 	iflibdd.9 ifdi_media_status.9 \
 	iflibdd.9 ifdi_media_change.9 \
 	iflibdd.9 ifdi_mtu_set.9 \
 	iflibdd.9 ifdi_multi_set.9 \
 	iflibdd.9 ifdi_promisc_set.9 \
 	iflibdd.9 ifdi_queues_alloc.9 \
 	iflibdd.9 ifdi_queues_free.9 \
 	iflibdd.9 ifdi_queue_intr_enable.9 \
 	iflibdd.9 ifdi_resume.9 \
 	iflibdd.9 ifdi_rxq_setup.9 \
 	iflibdd.9 ifdi_stop.9 \
 	iflibdd.9 ifdi_suspend.9 \
 	iflibdd.9 ifdi_sysctl_int_delay.9 \
 	iflibdd.9 ifdi_timer.9 \
 	iflibdd.9 ifdi_txq_setup.9 \
 	iflibdd.9 ifdi_update_admin_status.9 \
 	iflibdd.9 ifdi_vf_add.9 \
 	iflibdd.9 ifdi_vflr_handle.9 \
 	iflibdd.9 ifdi_vlan_register.9 \
 	iflibdd.9 ifdi_vlan_unregister.9 \
 	iflibdd.9 ifdi_watchdog_reset.9 \
 	iflibdd.9 iov_init.9 \
 	iflibdd.9 iov_uinit.9
 MLINKS+=iflibdi.9 iflib_add_int_delay_sysctl.9 \
 	iflibdi.9 iflib_device_attach.9 \
 	iflibdi.9 iflib_device_deregister.9 \
 	iflibdi.9 iflib_device_detach.9 \
 	iflibdi.9 iflib_device_suspend.9 \
 	iflibdi.9 iflib_device_register.9 \
 	iflibdi.9 iflib_device_resume.9 \
 	iflibdi.9 iflib_led_create.9 \
 	iflibdi.9 iflib_irq_alloc.9 \
 	iflibdi.9 iflib_irq_alloc_generic.9 \
 	iflibdi.9 iflib_link_intr_deferred.9 \
 	iflibdi.9 iflib_link_state_change.9 \
 	iflibdi.9 iflib_rx_intr_deferred.9 \
 	iflibdi.9 iflib_tx_intr_deferred.9
 MLINKS+=iflibtxrx.9 isc_rxd_available.9 \
 	iflibtxrx.9 isc_rxd_refill.9 \
 	iflibtxrx.9 isc_rxd_flush.9 \
 	iflibtxrx.9 isc_rxd_pkt_get.9 \
 	iflibtxrx.9 isc_txd_credits_update.9 \
 	iflibtxrx.9 isc_txd_encap.9 \
 	iflibtxrx.9 isc_txd_flush.9
 MLINKS+=ifnet.9 if_addmulti.9 \
 	ifnet.9 if_alloc.9 \
 	ifnet.9 if_alloc_dev.9 \
 	ifnet.9 if_alloc_domain.9 \
 	ifnet.9 if_allmulti.9 \
 	ifnet.9 if_attach.9 \
 	ifnet.9 if_data.9 \
 	ifnet.9 IF_DEQUEUE.9 \
 	ifnet.9 if_delmulti.9 \
 	ifnet.9 if_detach.9 \
 	ifnet.9 if_down.9 \
 	ifnet.9 if_findmulti.9 \
 	ifnet.9 if_free.9 \
 	ifnet.9 if_free_type.9 \
 	ifnet.9 if_up.9 \
 	ifnet.9 ifa_free.9 \
 	ifnet.9 ifa_ifwithaddr.9 \
 	ifnet.9 ifa_ifwithdstaddr.9 \
 	ifnet.9 ifa_ifwithnet.9 \
 	ifnet.9 ifa_ref.9 \
 	ifnet.9 ifaddr.9 \
 	ifnet.9 ifaddr_byindex.9 \
 	ifnet.9 ifaof_ifpforaddr.9 \
 	ifnet.9 ifioctl.9 \
 	ifnet.9 ifpromisc.9 \
 	ifnet.9 ifqueue.9 \
 	ifnet.9 ifunit.9 \
 	ifnet.9 ifunit_ref.9
 MLINKS+=insmntque.9 insmntque1.9
 MLINKS+=ithread.9 ithread_add_handler.9 \
 	ithread.9 ithread_create.9 \
 	ithread.9 ithread_destroy.9 \
 	ithread.9 ithread_priority.9 \
 	ithread.9 ithread_remove_handler.9 \
 	ithread.9 ithread_schedule.9
 MLINKS+=kernacc.9 useracc.9
 MLINKS+=kernel_mount.9 free_mntarg.9 \
 	kernel_mount.9 kernel_vmount.9 \
 	kernel_mount.9 mount_arg.9 \
 	kernel_mount.9 mount_argb.9 \
 	kernel_mount.9 mount_argf.9 \
 	kernel_mount.9 mount_argsu.9
 MLINKS+=khelp.9 khelp_add_hhook.9 \
 	khelp.9 KHELP_DECLARE_MOD.9 \
 	khelp.9 KHELP_DECLARE_MOD_UMA.9 \
 	khelp.9 khelp_destroy_osd.9 \
 	khelp.9 khelp_get_id.9 \
 	khelp.9 khelp_get_osd.9 \
 	khelp.9 khelp_init_osd.9 \
 	khelp.9 khelp_remove_hhook.9
 MLINKS+=kobj.9 DEFINE_CLASS.9 \
 	kobj.9 kobj_class_compile.9 \
 	kobj.9 kobj_class_compile_static.9 \
 	kobj.9 kobj_class_free.9 \
 	kobj.9 kobj_create.9 \
 	kobj.9 kobj_delete.9 \
 	kobj.9 kobj_init.9 \
 	kobj.9 kobj_init_static.9
 MLINKS+=kproc.9 kproc_create.9 \
 	kproc.9 kproc_exit.9 \
 	kproc.9 kproc_kthread_add.9 \
 	kproc.9 kproc_resume.9 \
 	kproc.9 kproc_shutdown.9 \
 	kproc.9 kproc_start.9 \
 	kproc.9 kproc_suspend.9 \
 	kproc.9 kproc_suspend_check.9 \
 	kproc.9 kthread_create.9
 MLINKS+=kqueue.9 knlist_add.9 \
 	kqueue.9 knlist_clear.9 \
 	kqueue.9 knlist_delete.9 \
 	kqueue.9 knlist_destroy.9 \
 	kqueue.9 knlist_empty.9 \
 	kqueue.9 knlist_init.9 \
 	kqueue.9 knlist_init_mtx.9 \
 	kqueue.9 knlist_init_rw_reader.9 \
 	kqueue.9 knlist_remove.9 \
 	kqueue.9 knlist_remove_inevent.9 \
 	kqueue.9 knote_fdclose.9 \
 	kqueue.9 KNOTE_LOCKED.9 \
 	kqueue.9 KNOTE_UNLOCKED.9 \
 	kqueue.9 kqfd_register.9 \
 	kqueue.9 kqueue_add_filteropts.9 \
 	kqueue.9 kqueue_del_filteropts.9
 MLINKS+=kthread.9 kthread_add.9 \
 	kthread.9 kthread_exit.9 \
 	kthread.9 kthread_resume.9 \
 	kthread.9 kthread_shutdown.9 \
 	kthread.9 kthread_start.9 \
 	kthread.9 kthread_suspend.9 \
 	kthread.9 kthread_suspend_check.9
 MLINKS+=ktr.9 CTR0.9 \
 	ktr.9 CTR1.9 \
 	ktr.9 CTR2.9 \
 	ktr.9 CTR3.9 \
 	ktr.9 CTR4.9 \
 	ktr.9 CTR5.9 \
 	ktr.9 CTR6.9
 MLINKS+=lock.9 lockdestroy.9 \
 	lock.9 lockinit.9 \
 	lock.9 lockmgr.9 \
 	lock.9 lockmgr_args.9 \
 	lock.9 lockmgr_args_rw.9 \
 	lock.9 lockmgr_assert.9 \
 	lock.9 lockmgr_disown.9 \
 	lock.9 lockmgr_printinfo.9 \
 	lock.9 lockmgr_recursed.9 \
 	lock.9 lockmgr_rw.9 \
 	lock.9 lockstatus.9
 MLINKS+=LOCK_PROFILING.9 MUTEX_PROFILING.9
 MLINKS+=make_dev.9 destroy_dev.9 \
 	make_dev.9 destroy_dev_drain.9 \
 	make_dev.9 destroy_dev_sched.9 \
 	make_dev.9 destroy_dev_sched_cb.9 \
 	make_dev.9 dev_depends.9 \
 	make_dev.9 make_dev_alias.9 \
 	make_dev.9 make_dev_alias_p.9 \
 	make_dev.9 make_dev_cred.9 \
 	make_dev.9 make_dev_credf.9 \
 	make_dev.9 make_dev_p.9 \
 	make_dev.9 make_dev_s.9
 MLINKS+=malloc.9 free.9 \
 	malloc.9 malloc_domainset.9 \
 	malloc.9 free_domain.9 \
 	malloc.9 mallocarray.9 \
 	malloc.9 MALLOC_DECLARE.9 \
 	malloc.9 MALLOC_DEFINE.9 \
 	malloc.9 realloc.9 \
 	malloc.9 reallocf.9
 MLINKS+=mbchain.9 mb_detach.9 \
 	mbchain.9 mb_done.9 \
 	mbchain.9 mb_fixhdr.9 \
 	mbchain.9 mb_init.9 \
 	mbchain.9 mb_initm.9 \
 	mbchain.9 mb_put_int64be.9 \
 	mbchain.9 mb_put_int64le.9 \
 	mbchain.9 mb_put_mbuf.9 \
 	mbchain.9 mb_put_mem.9 \
 	mbchain.9 mb_put_uint16be.9 \
 	mbchain.9 mb_put_uint16le.9 \
 	mbchain.9 mb_put_uint32be.9 \
 	mbchain.9 mb_put_uint32le.9 \
 	mbchain.9 mb_put_uint8.9 \
 	mbchain.9 mb_put_uio.9 \
 	mbchain.9 mb_reserve.9
 MLINKS+=\
 	mbuf.9 m_adj.9 \
 	mbuf.9 m_align.9 \
 	mbuf.9 M_ALIGN.9 \
 	mbuf.9 m_append.9 \
 	mbuf.9 m_apply.9 \
 	mbuf.9 m_cat.9 \
 	mbuf.9 m_catpkt.9 \
 	mbuf.9 MCHTYPE.9 \
 	mbuf.9 MCLGET.9 \
 	mbuf.9 m_collapse.9 \
 	mbuf.9 m_copyback.9 \
 	mbuf.9 m_copydata.9 \
 	mbuf.9 m_copym.9 \
 	mbuf.9 m_copypacket.9 \
 	mbuf.9 m_copyup.9 \
 	mbuf.9 m_defrag.9 \
 	mbuf.9 m_devget.9 \
 	mbuf.9 m_dup.9 \
 	mbuf.9 m_dup_pkthdr.9 \
 	mbuf.9 MEXTADD.9 \
 	mbuf.9 m_fixhdr.9 \
 	mbuf.9 m_free.9 \
 	mbuf.9 m_freem.9 \
 	mbuf.9 MGET.9 \
 	mbuf.9 m_get.9 \
 	mbuf.9 m_get2.9 \
 	mbuf.9 m_getjcl.9 \
 	mbuf.9 m_getcl.9 \
 	mbuf.9 MGETHDR.9 \
 	mbuf.9 m_gethdr.9 \
 	mbuf.9 m_getm.9 \
 	mbuf.9 m_getptr.9 \
 	mbuf.9 MH_ALIGN.9 \
 	mbuf.9 M_LEADINGSPACE.9 \
 	mbuf.9 m_length.9 \
 	mbuf.9 M_MOVE_PKTHDR.9 \
 	mbuf.9 m_move_pkthdr.9 \
 	mbuf.9 M_PREPEND.9 \
 	mbuf.9 m_prepend.9 \
 	mbuf.9 m_pulldown.9 \
 	mbuf.9 m_pullup.9 \
 	mbuf.9 m_split.9 \
 	mbuf.9 mtod.9 \
 	mbuf.9 M_TRAILINGSPACE.9 \
 	mbuf.9 m_unshare.9 \
 	mbuf.9 M_WRITABLE.9
 MLINKS+=\
 	mbuf_tags.9 m_tag_alloc.9 \
 	mbuf_tags.9 m_tag_copy.9 \
 	mbuf_tags.9 m_tag_copy_chain.9 \
 	mbuf_tags.9 m_tag_delete.9 \
 	mbuf_tags.9 m_tag_delete_chain.9 \
 	mbuf_tags.9 m_tag_delete_nonpersistent.9 \
 	mbuf_tags.9 m_tag_find.9 \
 	mbuf_tags.9 m_tag_first.9 \
 	mbuf_tags.9 m_tag_free.9 \
 	mbuf_tags.9 m_tag_get.9 \
 	mbuf_tags.9 m_tag_init.9 \
 	mbuf_tags.9 m_tag_locate.9 \
 	mbuf_tags.9 m_tag_next.9 \
 	mbuf_tags.9 m_tag_prepend.9 \
 	mbuf_tags.9 m_tag_unlink.9
 MLINKS+=MD5.9 MD5Init.9 \
 	MD5.9 MD5Transform.9
 MLINKS+=mdchain.9 md_append_record.9 \
 	mdchain.9 md_done.9 \
 	mdchain.9 md_get_int64.9 \
 	mdchain.9 md_get_int64be.9 \
 	mdchain.9 md_get_int64le.9 \
 	mdchain.9 md_get_mbuf.9 \
 	mdchain.9 md_get_mem.9 \
 	mdchain.9 md_get_uint16.9 \
 	mdchain.9 md_get_uint16be.9 \
 	mdchain.9 md_get_uint16le.9 \
 	mdchain.9 md_get_uint32.9 \
 	mdchain.9 md_get_uint32be.9 \
 	mdchain.9 md_get_uint32le.9 \
 	mdchain.9 md_get_uint8.9 \
 	mdchain.9 md_get_uio.9 \
 	mdchain.9 md_initm.9 \
 	mdchain.9 md_next_record.9
 MLINKS+=microtime.9 bintime.9 \
 	microtime.9 getbintime.9 \
 	microtime.9 getmicrotime.9 \
 	microtime.9 getnanotime.9 \
 	microtime.9 nanotime.9
 MLINKS+=microuptime.9 binuptime.9 \
 	microuptime.9 getbinuptime.9 \
 	microuptime.9 getmicrouptime.9 \
 	microuptime.9 getnanouptime.9 \
 	microuptime.9 getsbinuptime.9 \
 	microuptime.9 nanouptime.9 \
 	microuptime.9 sbinuptime.9
 MLINKS+=mi_switch.9 cpu_switch.9 \
 	mi_switch.9 cpu_throw.9
 MLINKS+=mod_cc.9 CCV.9 \
 	mod_cc.9 DECLARE_CC_MODULE.9
 MLINKS+=mtx_pool.9 mtx_pool_alloc.9 \
 	mtx_pool.9 mtx_pool_create.9 \
 	mtx_pool.9 mtx_pool_destroy.9 \
 	mtx_pool.9 mtx_pool_find.9 \
 	mtx_pool.9 mtx_pool_lock.9 \
 	mtx_pool.9 mtx_pool_lock_spin.9 \
 	mtx_pool.9 mtx_pool_unlock.9 \
 	mtx_pool.9 mtx_pool_unlock_spin.9
 MLINKS+=mutex.9 mtx_assert.9 \
 	mutex.9 mtx_destroy.9 \
 	mutex.9 mtx_init.9 \
 	mutex.9 mtx_initialized.9 \
 	mutex.9 mtx_lock.9 \
 	mutex.9 mtx_lock_flags.9 \
 	mutex.9 mtx_lock_spin.9 \
 	mutex.9 mtx_lock_spin_flags.9 \
 	mutex.9 mtx_owned.9 \
 	mutex.9 mtx_recursed.9 \
 	mutex.9 mtx_sleep.9 \
 	mutex.9 MTX_SYSINIT.9 \
 	mutex.9 mtx_trylock.9 \
 	mutex.9 mtx_trylock_flags.9 \
 	mutex.9 mtx_trylock_spin.9 \
 	mutex.9 mtx_trylock_spin_flags.9 \
 	mutex.9 mtx_unlock.9 \
 	mutex.9 mtx_unlock_flags.9 \
 	mutex.9 mtx_unlock_spin.9 \
 	mutex.9 mtx_unlock_spin_flags.9
 MLINKS+=namei.9 NDFREE.9 \
 	namei.9 NDINIT.9
 MLINKS+=netisr.9 netisr_clearqdrops.9 \
 	netisr.9 netisr_default_flow2cpu.9 \
 	netisr.9 netisr_dispatch.9 \
 	netisr.9 netisr_dispatch_src.9 \
 	netisr.9 netisr_get_cpucount.9 \
 	netisr.9 netisr_get_cpuid.9 \
 	netisr.9 netisr_getqdrops.9 \
 	netisr.9 netisr_getqlimit.9 \
 	netisr.9 netisr_queue.9 \
 	netisr.9 netisr_queue_src.9 \
 	netisr.9 netisr_register.9 \
 	netisr.9 netisr_setqlimit.9 \
 	netisr.9 netisr_unregister.9
 MLINKS+=nv.9 libnv.9 \
 	nv.9 nvlist.9 \
 	nv.9 nvlist_add_binary.9 \
 	nv.9 nvlist_add_bool.9 \
 	nv.9 nvlist_add_bool_array.9 \
 	nv.9 nvlist_add_descriptor.9 \
 	nv.9 nvlist_add_descriptor_array.9 \
 	nv.9 nvlist_add_null.9 \
 	nv.9 nvlist_add_number.9 \
 	nv.9 nvlist_add_number_array.9 \
 	nv.9 nvlist_add_nvlist.9 \
 	nv.9 nvlist_add_nvlist_array.9 \
 	nv.9 nvlist_add_string.9 \
 	nv.9 nvlist_add_stringf.9 \
 	nv.9 nvlist_add_stringv.9 \
 	nv.9 nvlist_add_string_array.9 \
 	nv.9 nvlist_clone.9 \
 	nv.9 nvlist_create.9 \
 	nv.9 nvlist_destroy.9 \
 	nv.9 nvlist_dump.9 \
 	nv.9 nvlist_empty.9 \
 	nv.9 nvlist_error.9 \
 	nv.9 nvlist_exists.9 \
 	nv.9 nvlist_exists_binary.9 \
 	nv.9 nvlist_exists_bool.9 \
 	nv.9 nvlist_exists_bool_array.9 \
 	nv.9 nvlist_exists_descriptor.9 \
 	nv.9 nvlist_exists_descriptor_array.9 \
 	nv.9 nvlist_exists_null.9 \
 	nv.9 nvlist_exists_number.9 \
 	nv.9 nvlist_exists_number_array.9 \
 	nv.9 nvlist_exists_nvlist.9 \
 	nv.9 nvlist_exists_nvlist_array.9 \
 	nv.9 nvlist_exists_string.9 \
 	nv.9 nvlist_exists_type.9 \
 	nv.9 nvlist_fdump.9 \
 	nv.9 nvlist_flags.9 \
 	nv.9 nvlist_free.9 \
 	nv.9 nvlist_free_binary.9 \
 	nv.9 nvlist_free_bool.9 \
 	nv.9 nvlist_free_bool_array.9 \
 	nv.9 nvlist_free_descriptor.9 \
 	nv.9 nvlist_free_descriptor_array.9 \
 	nv.9 nvlist_free_null.9 \
 	nv.9 nvlist_free_number.9 \
 	nv.9 nvlist_free_number_array.9 \
 	nv.9 nvlist_free_nvlist.9 \
 	nv.9 nvlist_free_nvlist_array.9 \
 	nv.9 nvlist_free_string.9 \
 	nv.9 nvlist_free_string_array.9 \
 	nv.9 nvlist_free_type.9 \
 	nv.9 nvlist_get_binary.9 \
 	nv.9 nvlist_get_bool.9 \
 	nv.9 nvlist_get_bool_array.9 \
 	nv.9 nvlist_get_descriptor.9 \
 	nv.9 nvlist_get_descriptor_array.9 \
 	nv.9 nvlist_get_number.9 \
 	nv.9 nvlist_get_number_array.9 \
 	nv.9 nvlist_get_nvlist.9 \
 	nv.9 nvlist_get_nvlist_array.9 \
 	nv.9 nvlist_get_parent.9 \
 	nv.9 nvlist_get_string.9 \
 	nv.9 nvlist_get_string_array.9 \
 	nv.9 nvlist_move_binary.9 \
 	nv.9 nvlist_move_descriptor.9 \
 	nv.9 nvlist_move_descriptor_array.9 \
 	nv.9 nvlist_move_nvlist.9 \
 	nv.9 nvlist_move_nvlist_array.9 \
 	nv.9 nvlist_move_string.9 \
 	nv.9 nvlist_move_string_array.9 \
 	nv.9 nvlist_next.9 \
 	nv.9 nvlist_pack.9 \
 	nv.9 nvlist_recv.9 \
 	nv.9 nvlist_send.9 \
 	nv.9 nvlist_set_error.9 \
 	nv.9 nvlist_size.9 \
 	nv.9 nvlist_take_binary.9 \
 	nv.9 nvlist_take_bool.9 \
 	nv.9 nvlist_take_bool_array.9 \
 	nv.9 nvlist_take_descriptor.9 \
 	nv.9 nvlist_take_descriptor_array.9 \
 	nv.9 nvlist_take_number.9 \
 	nv.9 nvlist_take_number_array.9 \
 	nv.9 nvlist_take_nvlist.9 \
 	nv.9 nvlist_take_nvlist_array.9 \
 	nv.9 nvlist_take_string.9 \
 	nv.9 nvlist_take_string_array.9 \
 	nv.9 nvlist_unpack.9 \
 	nv.9 nvlist_xfer.9
 MLINKS+=OF_child.9 OF_parent.9 \
 	OF_child.9 OF_peer.9
 MLINKS+=OF_device_from_xref.9 OF_device_register_xref.9 \
 	OF_device_from_xref.9 OF_xref_from_device.9
 MLINKS+=OF_getprop.9 OF_getencprop.9 \
 	OF_getprop.9 OF_getencprop_alloc.9 \
 	OF_getprop.9 OF_getencprop_alloc_multi.9 \
 	OF_getprop.9 OF_getprop_alloc.9 \
 	OF_getprop.9 OF_getprop_alloc_multi.9 \
 	OF_getprop.9 OF_getproplen.9 \
 	OF_getprop.9 OF_hasprop.9 \
 	OF_getprop.9 OF_nextprop.9 \
 	OF_getprop.9 OF_prop_free.9 \
 	OF_getprop.9 OF_searchencprop.9 \
 	OF_getprop.9 OF_searchprop.9 \
 	OF_getprop.9 OF_setprop.9
 MLINKS+=OF_node_from_xref.9 OF_xref_from_node.9
 MLINKS+=ofw_bus_is_compatible.9 ofw_bus_is_compatible_strict.9 \
 	ofw_bus_is_compatible.9 ofw_bus_node_is_compatible.9 \
 	ofw_bus_is_compatible.9 ofw_bus_search_compatible.9
 MLINKS+= ofw_bus_status_okay.9 ofw_bus_get_status.9 \
 	ofw_bus_status_okay.9 ofw_bus_node_status_okay.9
 MLINKS+=osd.9 osd_call.9 \
 	osd.9 osd_del.9 \
 	osd.9 osd_deregister.9 \
 	osd.9 osd_exit.9 \
 	osd.9 osd_get.9 \
 	osd.9 osd_register.9 \
 	osd.9 osd_set.9
 MLINKS+=panic.9 vpanic.9
 MLINKS+=PCBGROUP.9 in_pcbgroup_byhash.9 \
 	PCBGROUP.9 in_pcbgroup_byinpcb.9 \
 	PCBGROUP.9 in_pcbgroup_destroy.9 \
 	PCBGROUP.9 in_pcbgroup_enabled.9 \
 	PCBGROUP.9 in_pcbgroup_init.9 \
 	PCBGROUP.9 in_pcbgroup_remove.9 \
 	PCBGROUP.9 in_pcbgroup_update.9 \
 	PCBGROUP.9 in_pcbgroup_update_mbuf.9 \
 	PCBGROUP.9 in6_pcbgroup_byhash.9
 MLINKS+=pci.9 pci_alloc_msi.9 \
 	pci.9 pci_alloc_msix.9 \
 	pci.9 pci_disable_busmaster.9 \
 	pci.9 pci_disable_io.9 \
 	pci.9 pci_enable_busmaster.9 \
 	pci.9 pci_enable_io.9 \
 	pci.9 pci_find_bsf.9 \
 	pci.9 pci_find_cap.9 \
 	pci.9 pci_find_dbsf.9 \
 	pci.9 pci_find_device.9 \
 	pci.9 pci_find_extcap.9 \
 	pci.9 pci_find_htcap.9 \
 	pci.9 pci_find_pcie_root_port.9 \
 	pci.9 pci_get_id.9 \
 	pci.9 pci_get_max_read_req.9 \
 	pci.9 pci_get_powerstate.9 \
 	pci.9 pci_get_vpd_ident.9 \
 	pci.9 pci_get_vpd_readonly.9 \
 	pci.9 pci_iov_attach.9 \
 	pci.9 pci_iov_attach_name.9 \
 	pci.9 pci_iov_detach.9 \
 	pci.9 pci_msi_count.9 \
 	pci.9 pci_msix_count.9 \
 	pci.9 pci_msix_pba_bar.9 \
 	pci.9 pci_msix_table_bar.9 \
 	pci.9 pci_pending_msix.9 \
 	pci.9 pci_read_config.9 \
 	pci.9 pci_release_msi.9 \
 	pci.9 pci_remap_msix.9 \
 	pci.9 pci_restore_state.9 \
 	pci.9 pci_save_state.9 \
 	pci.9 pci_set_powerstate.9 \
 	pci.9 pci_set_max_read_req.9 \
 	pci.9 pci_write_config.9 \
 	pci.9 pcie_adjust_config.9 \
 	pci.9 pcie_flr.9 \
 	pci.9 pcie_max_completion_timeout.9 \
 	pci.9 pcie_read_config.9 \
 	pci.9 pcie_wait_for_pending_transactions.9 \
 	pci.9 pcie_write_config.9
 MLINKS+=pci_iov_schema.9 pci_iov_schema_alloc_node.9 \
 	pci_iov_schema.9 pci_iov_schema_add_bool.9 \
 	pci_iov_schema.9 pci_iov_schema_add_string.9 \
 	pci_iov_schema.9 pci_iov_schema_add_uint8.9 \
 	pci_iov_schema.9 pci_iov_schema_add_uint16.9 \
 	pci_iov_schema.9 pci_iov_schema_add_uint32.9 \
 	pci_iov_schema.9 pci_iov_schema_add_uint64.9 \
 	pci_iov_schema.9 pci_iov_schema_add_unicast_mac.9
 MLINKS+=pfil.9 pfil_add_hook.9 \
 	pfil.9 pfil_head_register.9 \
 	pfil.9 pfil_head_unregister.9 \
 	pfil.9 pfil_remove_hook.9 \
 	pfil.9 pfil_run_hooks.9 \
 	pfil.9 pfil_link.9
 MLINKS+=pfind.9 zpfind.9
 MLINKS+=PHOLD.9 PRELE.9 \
 	PHOLD.9 _PHOLD.9 \
 	PHOLD.9 _PRELE.9 \
 	PHOLD.9 PROC_ASSERT_HELD.9 \
 	PHOLD.9 PROC_ASSERT_NOT_HELD.9
 MLINKS+=pmap_copy.9 pmap_copy_page.9
 MLINKS+=pmap_extract.9 pmap_extract_and_hold.9
 MLINKS+=pmap_init.9 pmap_init2.9
 MLINKS+=pmap_is_modified.9 pmap_ts_referenced.9
 MLINKS+=pmap_pinit.9 pmap_pinit0.9 \
 	pmap_pinit.9 pmap_pinit2.9
 MLINKS+=pmap_qenter.9 pmap_qremove.9
 MLINKS+=pmap_quick_enter_page.9 pmap_quick_remove_page.9
 MLINKS+=pmap_remove.9 pmap_remove_all.9 \
 	pmap_remove.9 pmap_remove_pages.9
 MLINKS+=pmap_resident_count.9 pmap_wired_count.9
 MLINKS+=pmap_zero_page.9 pmap_zero_area.9
 MLINKS+=printf.9 log.9 \
 	printf.9 tprintf.9 \
 	printf.9 uprintf.9
 MLINKS+=priv.9 priv_check.9 \
 	priv.9 priv_check_cred.9
 MLINKS+=proc_rwmem.9 proc_readmem.9 \
 	proc_rwmem.9 proc_writemem.9
 MLINKS+=psignal.9 gsignal.9 \
 	psignal.9 pgsignal.9 \
 	psignal.9 tdsignal.9
 MLINKS+=pwmbus.9 pwm.9
 MLINKS+=random.9 arc4rand.9 \
 	random.9 arc4random.9 \
 	random.9 is_random_seeded.9 \
 	random.9 read_random.9 \
 	random.9 read_random_uio.9 \
 	random.9 srandom.9
 MLINKS+=random_harvest.9 random_harvest_direct.9 \
 	random_harvest.9 random_harvest_fast.9 \
 	random_harvest.9 random_harvest_queue.9
 MLINKS+=ratecheck.9 ppsratecheck.9
 MLINKS+=refcount.9 refcount_acquire.9 \
 	refcount.9 refcount_init.9 \
 	refcount.9 refcount_release.9
 MLINKS+=resource_int_value.9 resource_long_value.9 \
 	resource_int_value.9 resource_string_value.9
 MLINKS+=rman.9 rman_activate_resource.9 \
 	rman.9 rman_adjust_resource.9 \
 	rman.9 rman_deactivate_resource.9 \
 	rman.9 rman_fini.9 \
 	rman.9 rman_first_free_region.9 \
 	rman.9 rman_get_bushandle.9 \
 	rman.9 rman_get_bustag.9 \
 	rman.9 rman_get_device.9 \
 	rman.9 rman_get_end.9 \
 	rman.9 rman_get_flags.9 \
 	rman.9 rman_get_mapping.9 \
 	rman.9 rman_get_rid.9 \
 	rman.9 rman_get_size.9 \
 	rman.9 rman_get_start.9 \
 	rman.9 rman_get_virtual.9 \
 	rman.9 rman_init.9 \
 	rman.9 rman_init_from_resource.9 \
 	rman.9 rman_is_region_manager.9 \
 	rman.9 rman_last_free_region.9 \
 	rman.9 rman_make_alignment_flags.9 \
 	rman.9 rman_manage_region.9 \
 	rman.9 rman_release_resource.9 \
 	rman.9 rman_reserve_resource.9 \
 	rman.9 rman_reserve_resource_bound.9 \
 	rman.9 rman_set_bushandle.9 \
 	rman.9 rman_set_bustag.9 \
 	rman.9 rman_set_mapping.9 \
 	rman.9 rman_set_rid.9 \
 	rman.9 rman_set_virtual.9
 MLINKS+=rmlock.9 rm_assert.9 \
 	rmlock.9 rm_destroy.9 \
 	rmlock.9 rm_init.9 \
 	rmlock.9 rm_init_flags.9 \
 	rmlock.9 rm_rlock.9 \
 	rmlock.9 rm_runlock.9 \
 	rmlock.9 rm_sleep.9 \
 	rmlock.9 RM_SYSINIT.9 \
 	rmlock.9 RM_SYSINIT_FLAGS.9 \
 	rmlock.9 rm_try_rlock.9 \
 	rmlock.9 rm_wlock.9 \
 	rmlock.9 rm_wowned.9 \
 	rmlock.9 rm_wunlock.9
 MLINKS+=rtalloc.9 rtalloc1.9 \
 	rtalloc.9 rtalloc_ign.9 \
 	rtalloc.9 RT_ADDREF.9 \
 	rtalloc.9 RT_LOCK.9 \
 	rtalloc.9 RT_REMREF.9 \
 	rtalloc.9 RT_RTFREE.9 \
 	rtalloc.9 RT_UNLOCK.9 \
 	rtalloc.9 RTFREE_LOCKED.9 \
 	rtalloc.9 RTFREE.9 \
 	rtalloc.9 rtfree.9 \
 	rtalloc.9 rtalloc1_fib.9 \
 	rtalloc.9 rtalloc_ign_fib.9 \
 	rtalloc.9 rtalloc_fib.9
 MLINKS+=runqueue.9 choosethread.9 \
 	runqueue.9 procrunnable.9 \
 	runqueue.9 remrunqueue.9 \
 	runqueue.9 setrunqueue.9
 MLINKS+=rwlock.9 rw_assert.9 \
 	rwlock.9 rw_destroy.9 \
 	rwlock.9 rw_downgrade.9 \
 	rwlock.9 rw_init.9 \
 	rwlock.9 rw_init_flags.9 \
 	rwlock.9 rw_initialized.9 \
 	rwlock.9 rw_rlock.9 \
 	rwlock.9 rw_runlock.9 \
 	rwlock.9 rw_unlock.9 \
 	rwlock.9 rw_sleep.9 \
 	rwlock.9 RW_SYSINIT.9 \
 	rwlock.9 RW_SYSINIT_FLAGS.9 \
 	rwlock.9 rw_try_rlock.9 \
 	rwlock.9 rw_try_upgrade.9 \
 	rwlock.9 rw_try_wlock.9 \
 	rwlock.9 rw_wlock.9 \
 	rwlock.9 rw_wowned.9 \
 	rwlock.9 rw_wunlock.9
 MLINKS+=sbuf.9 sbuf_bcat.9 \
 	sbuf.9 sbuf_bcopyin.9 \
 	sbuf.9 sbuf_bcpy.9 \
 	sbuf.9 sbuf_cat.9 \
 	sbuf.9 sbuf_clear.9 \
 	sbuf.9 sbuf_clear_flags.9 \
 	sbuf.9 sbuf_copyin.9 \
 	sbuf.9 sbuf_cpy.9 \
 	sbuf.9 sbuf_data.9 \
 	sbuf.9 sbuf_delete.9 \
 	sbuf.9 sbuf_done.9 \
 	sbuf.9 sbuf_error.9 \
 	sbuf.9 sbuf_finish.9 \
 	sbuf.9 sbuf_get_flags.9 \
 	sbuf.9 sbuf_hexdump.9 \
 	sbuf.9 sbuf_len.9 \
 	sbuf.9 sbuf_new.9 \
 	sbuf.9 sbuf_new_auto.9 \
 	sbuf.9 sbuf_new_for_sysctl.9 \
 	sbuf.9 sbuf_printf.9 \
 	sbuf.9 sbuf_printf_drain.9 \
 	sbuf.9 sbuf_putbuf.9 \
 	sbuf.9 sbuf_putc.9 \
 	sbuf.9 sbuf_set_drain.9 \
 	sbuf.9 sbuf_set_flags.9 \
 	sbuf.9 sbuf_setpos.9 \
 	sbuf.9 sbuf_start_section.9 \
 	sbuf.9 sbuf_end_section.9  \
 	sbuf.9 sbuf_trim.9 \
 	sbuf.9 sbuf_vprintf.9
 MLINKS+=scheduler.9 curpriority_cmp.9 \
 	scheduler.9 maybe_resched.9 \
 	scheduler.9 propagate_priority.9 \
 	scheduler.9 resetpriority.9 \
 	scheduler.9 roundrobin.9 \
 	scheduler.9 roundrobin_interval.9 \
 	scheduler.9 schedclock.9 \
 	scheduler.9 schedcpu.9 \
 	scheduler.9 sched_setup.9 \
 	scheduler.9 setrunnable.9 \
 	scheduler.9 updatepri.9
 MLINKS+=SDT.9 SDT_PROVIDER_DECLARE.9 \
 	SDT.9 SDT_PROVIDER_DEFINE.9 \
 	SDT.9 SDT_PROBE_DECLARE.9 \
 	SDT.9 SDT_PROBE_DEFINE.9 \
 	SDT.9 SDT_PROBE.9
 MLINKS+=securelevel_gt.9 securelevel_ge.9
 MLINKS+=selrecord.9 seldrain.9 \
 	selrecord.9 selwakeup.9
 MLINKS+=sema.9 sema_destroy.9 \
 	sema.9 sema_init.9 \
 	sema.9 sema_post.9 \
 	sema.9 sema_timedwait.9 \
 	sema.9 sema_trywait.9 \
 	sema.9 sema_value.9 \
 	sema.9 sema_wait.9
 MLINKS+=seqc.9 seqc_consistent.9 \
 	seqc.9 seqc_read.9 \
 	seqc.9 seqc_write_begin.9 \
 	seqc.9 seqc_write_end.9
 MLINKS+=sf_buf.9 sf_buf_alloc.9 \
 	sf_buf.9 sf_buf_free.9 \
 	sf_buf.9 sf_buf_kva.9 \
 	sf_buf.9 sf_buf_page.9
 MLINKS+=sglist.9 sglist_alloc.9 \
 	sglist.9 sglist_append.9 \
 	sglist.9 sglist_append_bio.9 \
 	sglist.9 sglist_append_ext_pgs.9 \
 	sglist.9 sglist_append_mb_ext_pgs.9 \
 	sglist.9 sglist_append_mbuf.9 \
 	sglist.9 sglist_append_phys.9 \
 	sglist.9 sglist_append_sglist.9 \
 	sglist.9 sglist_append_uio.9 \
 	sglist.9 sglist_append_user.9 \
 	sglist.9 sglist_append_vmpages.9 \
 	sglist.9 sglist_build.9 \
 	sglist.9 sglist_clone.9 \
 	sglist.9 sglist_consume_uio.9 \
 	sglist.9 sglist_count.9 \
 	sglist.9 sglist_count_ext_pgs.9 \
 	sglist.9 sglist_count_mb_ext_pgs.9 \
 	sglist.9 sglist_count_vmpages.9 \
 	sglist.9 sglist_free.9 \
 	sglist.9 sglist_hold.9 \
 	sglist.9 sglist_init.9 \
 	sglist.9 sglist_join.9 \
 	sglist.9 sglist_length.9 \
 	sglist.9 sglist_reset.9 \
 	sglist.9 sglist_slice.9 \
 	sglist.9 sglist_split.9
 MLINKS+=shm_map.9 shm_unmap.9
 MLINKS+=signal.9 cursig.9 \
 	signal.9 execsigs.9 \
 	signal.9 issignal.9 \
 	signal.9 killproc.9 \
 	signal.9 pgsigio.9 \
 	signal.9 postsig.9 \
 	signal.9 SETSETNEQ.9 \
 	signal.9 SETSETOR.9 \
 	signal.9 SIGADDSET.9 \
 	signal.9 SIG_CONTSIGMASK.9 \
 	signal.9 SIGDELSET.9 \
 	signal.9 SIGEMPTYSET.9 \
 	signal.9 sigexit.9 \
 	signal.9 SIGFILLSET.9 \
 	signal.9 siginit.9 \
 	signal.9 SIGISEMPTY.9 \
 	signal.9 SIGISMEMBER.9 \
 	signal.9 SIGNOTEMPTY.9 \
 	signal.9 signotify.9 \
 	signal.9 SIGPENDING.9 \
 	signal.9 SIGSETAND.9 \
 	signal.9 SIGSETCANTMASK.9 \
 	signal.9 SIGSETEQ.9 \
 	signal.9 SIGSETNAND.9 \
 	signal.9 SIG_STOPSIGMASK.9 \
 	signal.9 trapsignal.9
 MLINKS+=sleep.9 msleep.9 \
 	sleep.9 msleep_sbt.9 \
 	sleep.9 msleep_spin.9 \
 	sleep.9 msleep_spin_sbt.9 \
 	sleep.9 pause.9 \
 	sleep.9 pause_sig.9 \
 	sleep.9 pause_sbt.9 \
 	sleep.9 tsleep.9 \
 	sleep.9 tsleep_sbt.9 \
 	sleep.9 wakeup.9 \
 	sleep.9 wakeup_one.9 \
 	sleep.9 wakeup_any.9
 MLINKS+=sleepqueue.9 init_sleepqueues.9 \
 	sleepqueue.9 sleepq_abort.9 \
 	sleepqueue.9 sleepq_add.9 \
 	sleepqueue.9 sleepq_alloc.9 \
 	sleepqueue.9 sleepq_broadcast.9 \
 	sleepqueue.9 sleepq_free.9 \
 	sleepqueue.9 sleepq_lookup.9 \
 	sleepqueue.9 sleepq_lock.9 \
 	sleepqueue.9 sleepq_release.9 \
 	sleepqueue.9 sleepq_remove.9 \
 	sleepqueue.9 sleepq_set_timeout.9 \
 	sleepqueue.9 sleepq_set_timeout_sbt.9 \
 	sleepqueue.9 sleepq_signal.9 \
 	sleepqueue.9 sleepq_sleepcnt.9 \
 	sleepqueue.9 sleepq_timedwait.9 \
 	sleepqueue.9 sleepq_timedwait_sig.9 \
 	sleepqueue.9 sleepq_type.9 \
 	sleepqueue.9 sleepq_wait.9 \
 	sleepqueue.9 sleepq_wait_sig.9
 MLINKS+=socket.9 soabort.9 \
 	socket.9 soaccept.9 \
 	socket.9 sobind.9 \
 	socket.9 socheckuid.9 \
 	socket.9 soclose.9 \
 	socket.9 soconnect.9 \
 	socket.9 socreate.9 \
 	socket.9 sodisconnect.9 \
 	socket.9 sodtor_set.9 \
 	socket.9 sodupsockaddr.9 \
 	socket.9 sofree.9 \
 	socket.9 sogetopt.9 \
 	socket.9 sohasoutofband.9 \
 	socket.9 solisten.9 \
 	socket.9 solisten_proto.9 \
 	socket.9 solisten_proto_check.9 \
 	socket.9 sonewconn.9 \
 	socket.9 sooptcopyin.9 \
 	socket.9 sooptcopyout.9 \
 	socket.9 sopoll.9 \
 	socket.9 sopoll_generic.9 \
 	socket.9 soreceive.9 \
 	socket.9 soreceive_dgram.9 \
 	socket.9 soreceive_generic.9 \
 	socket.9 soreceive_stream.9 \
 	socket.9 soreserve.9 \
 	socket.9 sorflush.9 \
 	socket.9 sosend.9 \
 	socket.9 sosend_dgram.9 \
 	socket.9 sosend_generic.9 \
 	socket.9 sosetopt.9 \
 	socket.9 soshutdown.9 \
 	socket.9 sotoxsocket.9 \
 	socket.9 soupcall_clear.9 \
 	socket.9 soupcall_set.9 \
 	socket.9 sowakeup.9
 MLINKS+=stack.9 stack_copy.9 \
 	stack.9 stack_create.9 \
 	stack.9 stack_destroy.9 \
 	stack.9 stack_print.9 \
 	stack.9 stack_print_ddb.9 \
 	stack.9 stack_print_short.9 \
 	stack.9 stack_print_short_ddb.9 \
 	stack.9 stack_put.9 \
 	stack.9 stack_save.9 \
 	stack.9 stack_sbuf_print.9 \
 	stack.9 stack_sbuf_print_ddb.9 \
 	stack.9 stack_zero.9
 MLINKS+=store.9 subyte.9 \
 	store.9 suword.9 \
 	store.9 suword16.9 \
 	store.9 suword32.9 \
 	store.9 suword64.9
 MLINKS+=swi.9 swi_add.9 \
 	swi.9 swi_remove.9 \
 	swi.9 swi_sched.9
 MLINKS+=sx.9 sx_assert.9 \
 	sx.9 sx_destroy.9 \
 	sx.9 sx_downgrade.9 \
 	sx.9 sx_init.9 \
 	sx.9 sx_init_flags.9 \
 	sx.9 sx_sleep.9 \
 	sx.9 sx_slock.9 \
 	sx.9 sx_slock_sig.9 \
 	sx.9 sx_sunlock.9 \
 	sx.9 SX_SYSINIT.9 \
 	sx.9 SX_SYSINIT_FLAGS.9 \
 	sx.9 sx_try_slock.9 \
 	sx.9 sx_try_upgrade.9 \
 	sx.9 sx_try_xlock.9 \
 	sx.9 sx_unlock.9 \
 	sx.9 sx_xholder.9 \
 	sx.9 sx_xlock.9 \
 	sx.9 sx_xlock_sig.9 \
 	sx.9 sx_xlocked.9 \
 	sx.9 sx_xunlock.9
 MLINKS+=syscall_helper_register.9 syscall_helper_unregister.9 \
 	syscall_helper_register.9 SYSCALL_INIT_HELPER.9 \
 	syscall_helper_register.9 SYSCALL_INIT_HELPER_COMPAT.9 \
 	syscall_helper_register.9 SYSCALL_INIT_HELPER_COMPAT_F.9 \
 	syscall_helper_register.9 SYSCALL_INIT_HELPER_F.9
 MLINKS+=sysctl.9 SYSCTL_DECL.9 \
 	sysctl.9 SYSCTL_ADD_INT.9 \
 	sysctl.9 SYSCTL_ADD_LONG.9 \
 	sysctl.9 SYSCTL_ADD_NODE.9 \
 	sysctl.9 SYSCTL_ADD_NODE_WITH_LABEL.9 \
 	sysctl.9 SYSCTL_ADD_OPAQUE.9 \
 	sysctl.9 SYSCTL_ADD_PROC.9 \
 	sysctl.9 SYSCTL_ADD_QUAD.9 \
 	sysctl.9 SYSCTL_ADD_ROOT_NODE.9 \
 	sysctl.9 SYSCTL_ADD_S8.9 \
 	sysctl.9 SYSCTL_ADD_S16.9 \
 	sysctl.9 SYSCTL_ADD_S32.9 \
 	sysctl.9 SYSCTL_ADD_S64.9 \
 	sysctl.9 SYSCTL_ADD_STRING.9 \
 	sysctl.9 SYSCTL_ADD_STRUCT.9 \
 	sysctl.9 SYSCTL_ADD_TIMEVAL_SEC.9 \
 	sysctl.9 SYSCTL_ADD_U8.9 \
 	sysctl.9 SYSCTL_ADD_U16.9 \
 	sysctl.9 SYSCTL_ADD_U32.9 \
 	sysctl.9 SYSCTL_ADD_U64.9 \
 	sysctl.9 SYSCTL_ADD_UAUTO.9 \
 	sysctl.9 SYSCTL_ADD_UINT.9 \
 	sysctl.9 SYSCTL_ADD_ULONG.9 \
 	sysctl.9 SYSCTL_ADD_UQUAD.9 \
 	sysctl.9 SYSCTL_CHILDREN.9 \
 	sysctl.9 SYSCTL_STATIC_CHILDREN.9 \
 	sysctl.9 SYSCTL_NODE_CHILDREN.9 \
 	sysctl.9 SYSCTL_PARENT.9 \
 	sysctl.9 SYSCTL_INT.9 \
 	sysctl.9 SYSCTL_INT_WITH_LABEL.9 \
 	sysctl.9 SYSCTL_LONG.9 \
 	sysctl.9 sysctl_msec_to_ticks.9 \
 	sysctl.9 SYSCTL_NODE.9 \
 	sysctl.9 SYSCTL_NODE_WITH_LABEL.9 \
 	sysctl.9 SYSCTL_OPAQUE.9 \
 	sysctl.9 SYSCTL_PROC.9 \
 	sysctl.9 SYSCTL_QUAD.9 \
 	sysctl.9 SYSCTL_ROOT_NODE.9 \
 	sysctl.9 SYSCTL_S8.9 \
 	sysctl.9 SYSCTL_S16.9 \
 	sysctl.9 SYSCTL_S32.9 \
 	sysctl.9 SYSCTL_S64.9 \
 	sysctl.9 SYSCTL_STRING.9 \
 	sysctl.9 SYSCTL_STRUCT.9 \
 	sysctl.9 SYSCTL_TIMEVAL_SEC.9 \
 	sysctl.9 SYSCTL_U8.9 \
 	sysctl.9 SYSCTL_U16.9 \
 	sysctl.9 SYSCTL_U32.9 \
 	sysctl.9 SYSCTL_U64.9 \
 	sysctl.9 SYSCTL_UINT.9 \
 	sysctl.9 SYSCTL_ULONG.9 \
 	sysctl.9 SYSCTL_UQUAD.9
 MLINKS+=sysctl_add_oid.9 sysctl_move_oid.9 \
 	sysctl_add_oid.9 sysctl_remove_oid.9 \
 	sysctl_add_oid.9 sysctl_remove_name.9
 MLINKS+=sysctl_ctx_init.9 sysctl_ctx_entry_add.9 \
 	sysctl_ctx_init.9 sysctl_ctx_entry_del.9 \
 	sysctl_ctx_init.9 sysctl_ctx_entry_find.9 \
 	sysctl_ctx_init.9 sysctl_ctx_free.9
 MLINKS+=SYSINIT.9 SYSUNINIT.9
 MLINKS+=taskqueue.9 TASK_INIT.9 \
 	taskqueue.9 TASK_INITIALIZER.9 \
 	taskqueue.9 taskqueue_block.9 \
 	taskqueue.9 taskqueue_cancel.9 \
 	taskqueue.9 taskqueue_cancel_timeout.9 \
 	taskqueue.9 taskqueue_create.9 \
 	taskqueue.9 taskqueue_create_fast.9 \
 	taskqueue.9 TASKQUEUE_DECLARE.9 \
 	taskqueue.9 TASKQUEUE_DEFINE.9 \
 	taskqueue.9 TASKQUEUE_DEFINE_THREAD.9 \
 	taskqueue.9 taskqueue_drain.9 \
 	taskqueue.9 taskqueue_drain_all.9 \
 	taskqueue.9 taskqueue_drain_timeout.9 \
 	taskqueue.9 taskqueue_enqueue.9 \
 	taskqueue.9 taskqueue_enqueue_timeout.9 \
 	taskqueue.9 TASKQUEUE_FAST_DEFINE.9 \
 	taskqueue.9 TASKQUEUE_FAST_DEFINE_THREAD.9 \
 	taskqueue.9 taskqueue_free.9 \
 	taskqueue.9 taskqueue_member.9 \
 	taskqueue.9 taskqueue_quiesce.9 \
 	taskqueue.9 taskqueue_run.9 \
 	taskqueue.9 taskqueue_set_callback.9 \
 	taskqueue.9 taskqueue_start_threads.9 \
 	taskqueue.9 taskqueue_start_threads_pinned.9 \
 	taskqueue.9 taskqueue_unblock.9 \
 	taskqueue.9 TIMEOUT_TASK_INIT.9
 MLINKS+=tcp_functions.9 register_tcp_functions.9 \
 	tcp_functions.9 register_tcp_functions_as_name.9 \
 	tcp_functions.9 register_tcp_functions_as_names.9 \
 	tcp_functions.9 deregister_tcp_functions.9
 MLINKS+=time.9 boottime.9 \
 	time.9 time_second.9 \
 	time.9 time_uptime.9
 MLINKS+=timeout.9 callout.9 \
 	timeout.9 callout_active.9 \
 	timeout.9 callout_async_drain.9 \
 	timeout.9 callout_deactivate.9 \
 	timeout.9 callout_drain.9 \
 	timeout.9 callout_handle_init.9 \
 	timeout.9 callout_init.9 \
 	timeout.9 callout_init_mtx.9 \
 	timeout.9 callout_init_rm.9 \
 	timeout.9 callout_init_rw.9 \
 	timeout.9 callout_pending.9 \
 	timeout.9 callout_reset.9 \
 	timeout.9 callout_reset_curcpu.9 \
 	timeout.9 callout_reset_on.9 \
 	timeout.9 callout_reset_sbt.9 \
 	timeout.9 callout_reset_sbt_curcpu.9 \
 	timeout.9 callout_reset_sbt_on.9 \
 	timeout.9 callout_schedule.9 \
 	timeout.9 callout_schedule_curcpu.9 \
 	timeout.9 callout_schedule_on.9 \
 	timeout.9 callout_schedule_sbt.9 \
 	timeout.9 callout_schedule_sbt_curcpu.9 \
 	timeout.9 callout_schedule_sbt_on.9 \
 	timeout.9 callout_stop.9 \
 	timeout.9 callout_when.9 \
 	timeout.9 untimeout.9
 MLINKS+=ucred.9 crcopy.9 \
 	ucred.9 crcopysafe.9 \
 	ucred.9 crdup.9 \
 	ucred.9 crfree.9 \
 	ucred.9 crget.9 \
 	ucred.9 crhold.9 \
 	ucred.9 crsetgroups.9 \
 	ucred.9 cru2x.9
 MLINKS+=uidinfo.9 uifind.9 \
 	uidinfo.9 uifree.9 \
 	uidinfo.9 uihashinit.9 \
 	uidinfo.9 uihold.9
 MLINKS+=uio.9 uiomove.9 \
 	uio.9 uiomove_frombuf.9 \
 	uio.9 uiomove_nofault.9
 
 .if ${MK_USB} != "no"
 MAN+=	usbdi.9
 MLINKS+=usbdi.9 usbd_do_request.9 \
 	usbdi.9 usbd_do_request_flags.9 \
 	usbdi.9 usbd_errstr.9 \
 	usbdi.9 usbd_lookup_id_by_info.9 \
 	usbdi.9 usbd_lookup_id_by_uaa.9 \
 	usbdi.9 usbd_transfer_clear_stall.9 \
 	usbdi.9 usbd_transfer_drain.9 \
 	usbdi.9 usbd_transfer_pending.9 \
 	usbdi.9 usbd_transfer_poll.9 \
 	usbdi.9 usbd_transfer_setup.9 \
 	usbdi.9 usbd_transfer_start.9 \
 	usbdi.9 usbd_transfer_stop.9 \
 	usbdi.9 usbd_transfer_submit.9 \
 	usbdi.9 usbd_transfer_unsetup.9 \
 	usbdi.9 usbd_xfer_clr_flag.9 \
 	usbdi.9 usbd_xfer_frame_data.9 \
 	usbdi.9 usbd_xfer_frame_len.9 \
 	usbdi.9 usbd_xfer_get_frame.9 \
 	usbdi.9 usbd_xfer_get_priv.9 \
 	usbdi.9 usbd_xfer_is_stalled.9 \
 	usbdi.9 usbd_xfer_max_framelen.9 \
 	usbdi.9 usbd_xfer_max_frames.9 \
 	usbdi.9 usbd_xfer_max_len.9 \
 	usbdi.9 usbd_xfer_set_flag.9 \
 	usbdi.9 usbd_xfer_set_frame_data.9 \
 	usbdi.9 usbd_xfer_set_frame_len.9 \
 	usbdi.9 usbd_xfer_set_frame_offset.9 \
 	usbdi.9 usbd_xfer_set_frames.9 \
 	usbdi.9 usbd_xfer_set_interval.9 \
 	usbdi.9 usbd_xfer_set_priv.9 \
 	usbdi.9 usbd_xfer_set_stall.9 \
 	usbdi.9 usbd_xfer_set_timeout.9 \
 	usbdi.9 usbd_xfer_softc.9 \
 	usbdi.9 usbd_xfer_state.9 \
 	usbdi.9 usbd_xfer_status.9 \
 	usbdi.9 usb_fifo_alloc_buffer.9 \
 	usbdi.9 usb_fifo_attach.9 \
 	usbdi.9 usb_fifo_detach.9 \
 	usbdi.9 usb_fifo_free_buffer.9 \
 	usbdi.9 usb_fifo_get_data.9 \
 	usbdi.9 usb_fifo_get_data_buffer.9 \
 	usbdi.9 usb_fifo_get_data_error.9 \
 	usbdi.9 usb_fifo_get_data_linear.9 \
 	usbdi.9 usb_fifo_put_bytes_max.9 \
 	usbdi.9 usb_fifo_put_data.9 \
 	usbdi.9 usb_fifo_put_data_buffer.9 \
 	usbdi.9 usb_fifo_put_data_error.9 \
 	usbdi.9 usb_fifo_put_data_linear.9 \
 	usbdi.9 usb_fifo_reset.9 \
 	usbdi.9 usb_fifo_softc.9 \
 	usbdi.9 usb_fifo_wakeup.9
 .endif
 MLINKS+=vcount.9 count_dev.9
 MLINKS+=vfsconf.9 vfs_modevent.9 \
 	vfsconf.9 vfs_register.9 \
 	vfsconf.9 vfs_unregister.9
 MLINKS+=vfs_getopt.9 vfs_copyopt.9 \
 	vfs_getopt.9 vfs_filteropt.9 \
 	vfs_getopt.9 vfs_flagopt.9 \
 	vfs_getopt.9 vfs_getopts.9 \
 	vfs_getopt.9 vfs_scanopt.9 \
 	vfs_getopt.9 vfs_setopt.9 \
 	vfs_getopt.9 vfs_setopt_part.9 \
 	vfs_getopt.9 vfs_setopts.9
 MLINKS+=vhold.9 vdrop.9 \
 	vhold.9 vdropl.9 \
 	vhold.9 vholdl.9
 MLINKS+=vmem.9 vmem_add.9 \
 	vmem.9 vmem_alloc.9 \
 	vmem.9 vmem_create.9 \
 	vmem.9 vmem_destroy.9 \
 	vmem.9 vmem_free.9 \
 	vmem.9 vmem_xalloc.9 \
 	vmem.9 vmem_xfree.9  
 MLINKS+=vm_map_lock.9 vm_map_lock_downgrade.9 \
 	vm_map_lock.9 vm_map_lock_read.9 \
 	vm_map_lock.9 vm_map_lock_upgrade.9 \
 	vm_map_lock.9 vm_map_trylock.9 \
 	vm_map_lock.9 vm_map_trylock_read.9 \
 	vm_map_lock.9 vm_map_unlock.9 \
 	vm_map_lock.9 vm_map_unlock_read.9
 MLINKS+=vm_map_lookup.9 vm_map_lookup_done.9
 MLINKS+=vm_map_max.9 vm_map_min.9 \
 	vm_map_max.9 vm_map_pmap.9
 MLINKS+=vm_map_stack.9 vm_map_growstack.9
 MLINKS+=vm_map_wire.9 vm_map_unwire.9
 MLINKS+=vm_page_bits.9 vm_page_clear_dirty.9 \
 	vm_page_bits.9 vm_page_dirty.9 \
 	vm_page_bits.9 vm_page_is_valid.9 \
 	vm_page_bits.9 vm_page_set_invalid.9 \
 	vm_page_bits.9 vm_page_set_validclean.9 \
 	vm_page_bits.9 vm_page_test_dirty.9 \
 	vm_page_bits.9 vm_page_undirty.9 \
 	vm_page_bits.9 vm_page_zero_invalid.9
 MLINKS+=vm_page_busy.9 vm_page_busied.9 \
 	vm_page_busy.9 vm_page_busy_downgrade.9 \
 	vm_page_busy.9 vm_page_busy_sleep.9 \
 	vm_page_busy.9 vm_page_sbusied.9 \
 	vm_page_busy.9 vm_page_sbusy.9 \
 	vm_page_busy.9 vm_page_sleep_if_busy.9 \
 	vm_page_busy.9 vm_page_sunbusy.9 \
 	vm_page_busy.9 vm_page_trysbusy.9 \
 	vm_page_busy.9 vm_page_tryxbusy.9 \
 	vm_page_busy.9 vm_page_xbusied.9 \
 	vm_page_busy.9 vm_page_xbusy.9 \
 	vm_page_busy.9 vm_page_xunbusy.9 \
 	vm_page_busy.9 vm_page_assert_sbusied.9 \
 	vm_page_busy.9 vm_page_assert_unbusied.9 \
 	vm_page_busy.9 vm_page_assert_xbusied.9
 MLINKS+=vm_page_aflag.9 vm_page_aflag_clear.9 \
 	vm_page_aflag.9 vm_page_aflag_set.9 \
 	vm_page_aflag.9 vm_page_reference.9
 MLINKS+=vm_page_free.9 vm_page_free_toq.9 \
 	vm_page_free.9 vm_page_free_zero.9 \
 	vm_page_free.9 vm_page_try_to_free.9
 MLINKS+=vm_page_insert.9 vm_page_remove.9
 MLINKS+=vm_page_wire.9 vm_page_unwire.9
 MLINKS+=VOP_ACCESS.9 VOP_ACCESSX.9
 MLINKS+=VOP_ATTRIB.9 VOP_GETATTR.9 \
 	VOP_ATTRIB.9 VOP_SETATTR.9
 MLINKS+=VOP_CREATE.9 VOP_MKDIR.9 \
 	VOP_CREATE.9 VOP_MKNOD.9 \
 	VOP_CREATE.9 VOP_SYMLINK.9
 MLINKS+=VOP_FSYNC.9 VOP_FDATASYNC.9
 MLINKS+=VOP_GETPAGES.9 VOP_PUTPAGES.9
 MLINKS+=VOP_INACTIVE.9 VOP_RECLAIM.9
 MLINKS+=VOP_LOCK.9 vn_lock.9 \
 	VOP_LOCK.9 VOP_ISLOCKED.9 \
 	VOP_LOCK.9 VOP_UNLOCK.9
 MLINKS+=VOP_OPENCLOSE.9 VOP_CLOSE.9 \
 	VOP_OPENCLOSE.9 VOP_OPEN.9
 MLINKS+=VOP_RDWR.9 VOP_READ.9 \
 	VOP_RDWR.9 VOP_WRITE.9
 MLINKS+=VOP_REMOVE.9 VOP_RMDIR.9
 MLINKS+=vnet.9 vimage.9
 MLINKS+=vref.9 VREF.9 \
 	vref.9 vrefl.9
 MLINKS+=vrele.9 vput.9 \
 	vrele.9 vunref.9
 MLINKS+=vslock.9 vsunlock.9
 MLINKS+=zone.9 uma.9 \
 	zone.9 uma_zalloc.9 \
 	zone.9 uma_zalloc_arg.9 \
 	zone.9 uma_zalloc_domain.9 \
 	zone.9 uma_zcreate.9 \
 	zone.9 uma_zdestroy.9 \
 	zone.9 uma_zfree.9 \
 	zone.9 uma_zfree_arg.9 \
 	zone.9 uma_zfree_domain.9 \
 	zone.9 uma_zone_get_cur.9 \
 	zone.9 uma_zone_get_max.9 \
 	zone.9 uma_zone_set_max.9 \
 	zone.9 uma_zone_set_warning.9 \
 	zone.9 uma_zone_set_maxaction.9
 
 .include <bsd.prog.mk>
Index: head/share/man/man9/g_bio.9
===================================================================
--- head/share/man/man9/g_bio.9	(revision 350693)
+++ head/share/man/man9/g_bio.9	(revision 350694)
@@ -1,306 +1,328 @@
 .\"
 .\" Copyright (c) 2004-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
 .\" All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE DEVELOPERS ``AS IS'' AND ANY EXPRESS OR
 .\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 .\" OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 .\" IN NO EVENT SHALL THE DEVELOPERS BE LIABLE FOR ANY DIRECT, INDIRECT,
 .\" INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 .\" NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 .\" DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 .\" THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 .\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 .\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
 .\"
-.Dd Mar 7, 2018
+.Dd August 7, 2019
 .Dt G_BIO 9
 .Os
 .Sh NAME
 .Nm g_new_bio ,
 .Nm g_clone_bio ,
 .Nm g_destroy_bio ,
+.Nm g_format_bio ,
 .Nm g_print_bio ,
 .Nm g_reset_bio
 .Nd "GEOM bio controlling functions"
 .Sh SYNOPSIS
 .In sys/bio.h
 .In geom/geom.h
 .Ft "struct bio *"
 .Fn g_new_bio void
 .Ft "struct bio *"
 .Fn g_alloc_bio void
 .Ft "struct bio *"
 .Fn g_clone_bio "struct bio *bp"
 .Ft "struct bio *"
 .Fn g_duplicate_bio "struct bio *bp"
 .Ft void
 .Fn g_destroy_bio "struct bio *bp"
 .Ft void
-.Fn g_print_bio "struct bio *bp"
+.Fn g_format_bio "struct sbuf *sb" "const struct bio *bp"
 .Ft void
+.Fo g_print_bio
+.Fa "struct sbuf *sb" "const char *prefix" "const struct bio *bp"
+.Fa "const char *fmtsuffix" ...
+.Fc
+.Ft void
 .Fn g_reset_bio "struct bio *bp"
 .Sh DESCRIPTION
 A
 .Vt "struct bio"
 is used by GEOM to describe I/O requests, its
 most important fields are described below:
 .Bl -tag -width ".Va bio_attribute"
 .It Va bio_cmd
 I/O request command.
 There are five I/O requests available in GEOM:
 .Bl -tag -width ".Dv BIO_GETATTR"
 .It Dv BIO_READ
 A read request.
 .It Dv BIO_WRITE
 A write request.
 .It Dv BIO_DELETE
 Indicates that a certain range of data is no longer used and that
 it can be erased or freed as the underlying technology supports.
 Technologies like flash adaptation layers can arrange to erase the relevant
 blocks before they will become reassigned and cryptographic devices may
 want to fill random bits into the range to reduce the amount of data
 available for attack.
 .It Dv BIO_GETATTR
 Inspect and manipulate out-of-band
 attributes on a particular provider or path.
 Attributes are named by ascii strings and are stored in the
 .Va bio_attribute
 field.
 .It Dv BIO_FLUSH
 Tells underlying providers to flush their write caches.
 .El
 .It Va bio_flags
 Available flags:
 .Bl -tag -width ".Dv BIO_ERROR"
 .It Dv BIO_ERROR
 Request failed (error value is stored in
 .Va bio_error
 field).
 .It Dv BIO_DONE
 Request finished.
 .El
 .It Va bio_cflags
 Private use by the consumer.
 .It Va bio_pflags
 Private use by the provider.
 .It Va bio_offset
 Offset into provider.
 .It Va bio_data
 Pointer to data buffer.
 .It Va bio_error
 Error value when
 .Dv BIO_ERROR
 is set.
 .It Va bio_done
 Pointer to function which will be called when the request is finished.
 .It Va bio_driver1
 Private use by the provider.
 .It Va bio_driver2
 Private use by the provider.
 .It Va bio_caller1
 Private use by the consumer.
 .It Va bio_caller2
 Private use by the consumer.
 .It Va bio_attribute
 Attribute string for
 .Dv BIO_GETATTR
 request.
 .It Va bio_from
 Consumer to use for request (attached to provider stored in
 .Va bio_to
 field) (typically read-only for a class).
 .It Va bio_to
 Destination provider (typically read-only for a class).
 .It Va bio_length
 Request length in bytes.
 .It Va bio_completed
 Number of bytes completed, but they may not be completed from
 the front of the request.
 .It Va bio_children
 Number of
 .Vt bio
 clones (typically read-only for a class).
 .It Va bio_inbed
 Number of finished
 .Vt bio
 clones.
 .It Va bio_parent
 Pointer to parent
 .Vt bio .
 .El
 .Pp
 The
 .Fn g_new_bio
 function allocates a new, empty
 .Vt bio
 structure.
 .Pp
 .Fn g_alloc_bio
 - same as
 .Fn g_new_bio ,
 but always succeeds (allocates bio with the
 .Dv M_WAITOK
 malloc flag).
 .Pp
 The
 .Fn g_clone_bio
 function allocates a new
 .Vt bio
 structure and copies the following fields from the
 .Vt bio
 given as an argument to clone:
 .Va bio_cmd ,
 .Va bio_length ,
 .Va bio_offset ,
 .Va bio_data ,
 .Va bio_attribute .
 The field
 .Va bio_parent
 in the clone points to the passed
 .Vt bio
 and the field
 .Va bio_children
 in the passed
 .Vt bio
 is incremented.
 .Pp
 This function should be used for every request which enters through
 the provider of a particular geom and needs to be scheduled down.
 Proper order is:
 .Pp
 .Bl -enum -compact
 .It
 Clone the received
 .Vt "struct bio" .
 .It
 Modify the clone.
 .It
 Schedule the clone on its own consumer.
 .El
 .Pp
 .Fn g_duplicate_bio
 - same as
 .Fn g_clone_bio ,
 but always succeeds (allocates bio with the
 .Dv M_WAITOK
 malloc flag).
 .Pp
 The
 .Fn g_destroy_bio
 function deallocates and destroys the given
 .Vt bio
 structure.
 .Pp
 The
-.Fn g_print_bio
+.Fn g_format_bio
 function prints information about the given
 .Vt bio
-structure (for debugging purposes).
+structure into the provided
+.Vt sbuf .
 .Pp
 The
+.Fn g_print_bio
+function is a convenience wrapper around
+.Fn g_format_bio
+that can be used for debugging purposes.
+It prints a provided
+.Fa prefix
+string, followed by the formatted
+.Vt bio ,
+followed by a
+.Fa fmtsuffix
+in the style of
+.Xr 9 printf .
+Any of the prefix or suffix strings may be the empty string.
+.Fn g_print_bio
+always prints a newline character at the end of the line.
+.Pp
+The
 .Fn g_reset_bio
 function resets the given
 .Vt bio
 structure back to its initial state.
 .Fn g_reset_bio
 preserves internal data structures, while setting all
 user visible fields to their initial values.
 When reusing a
 .Vt bio
 obtained from
 .Fn g_new_bio ,
 .Fn g_alloc_bio ,
 .Fn g_clone_bio ,
 or
 .Fn g_duplicate_bio
 for multiple transactions,
 .Fn g_reset_bio
 must be called between the transactions in lieu of
 .Fn bzero .
 While not strictly required for a
 .Vt bio
 structure created by other means,
 .Fn g_reset_bio
 should be used to initialize it and between transactions.
 .Sh RETURN VALUES
 The
 .Fn g_new_bio
 and
 .Fn g_clone_bio
 functions return a pointer to the allocated
 .Vt bio ,
 or
 .Dv NULL
 if an error occurred.
 .Sh EXAMPLES
 Implementation of
 .Dq Dv NULL Ns -transformation ,
 meaning that an I/O request is cloned and scheduled down without any
 modifications.
 Let us assume that field
 .Va ex_consumer
 in structure
 .Vt example_softc
 contains a consumer attached to the provider we want to operate on.
 .Bd -literal -offset indent
 void
 example_start(struct bio *bp)
 {
 	struct example_softc *sc;
 	struct bio *cbp;
 
-	printf("Request received: ");
-	g_print_bio(bp);
-	printf("\\n");
+	g_print_bio("Request received: ", bp, "");
 
 	sc = bp->bio_to->geom->softc;
 	if (sc == NULL) {
 		g_io_deliver(bp, ENXIO);
 		return;
 	}
 
 	/* Let's clone our bio request. */
 	cbp = g_clone_bio(bp);
 	if (cbp == NULL) {
 		g_io_deliver(bp, ENOMEM);
 		return;
 	}
 	cbp->bio_done = g_std_done;	/* Standard 'done' function. */
 
 	/* Ok, schedule it down. */
 	/*
 	 * The consumer can be obtained from
 	 * LIST_FIRST(&bp->bio_to->geom->consumer) as well,
 	 * if there is only one in our geom.
 	 */
 	g_io_request(cbp, sc->ex_consumer);
 }
 .Ed
 .Sh SEE ALSO
 .Xr geom 4 ,
 .Xr DECLARE_GEOM_CLASS 9 ,
 .Xr g_access 9 ,
 .Xr g_attach 9 ,
 .Xr g_consumer 9 ,
 .Xr g_data 9 ,
 .Xr g_event 9 ,
 .Xr g_geom 9 ,
 .Xr g_provider 9 ,
 .Xr g_provider_by_name 9 ,
 .Xr g_wither_geom 9
 .Sh AUTHORS
 .An -nosplit
 This manual page was written by
 .An Pawel Jakub Dawidek Aq Mt pjd@FreeBSD.org .
Index: head/sys/dev/fdc/fdc.c
===================================================================
--- head/sys/dev/fdc/fdc.c	(revision 350693)
+++ head/sys/dev/fdc/fdc.c	(revision 350694)
@@ -1,2106 +1,2104 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 2004 Poul-Henning Kamp
  * Copyright (c) 1990 The Regents of the University of California.
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Don Ahn.
  *
  * Libretto PCMCIA floppy support by David Horwitt (dhorwitt@ucsd.edu)
  * aided by the Linux floppy driver modifications from David Bateman
  * (dbateman@eng.uts.edu.au).
  *
  * Copyright (c) 1993, 1994 by
  *  jc@irbs.UUCP (John Capo)
  *  vak@zebub.msk.su (Serge Vakulenko)
  *  ache@astral.msk.su (Andrew A. Chernov)
  *
  * Copyright (c) 1993, 1994, 1995 by
  *  joerg_wunsch@uriah.sax.de (Joerg Wunsch)
  *  dufault@hda.com (Peter Dufault)
  *
  * Copyright (c) 2001 Joerg Wunsch,
  *  joerg_wunsch@uriah.heep.sax.de (Joerg Wunsch)
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from:	@(#)fd.c	7.4 (Berkeley) 5/25/91
  *
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_fdc.h"
 
 #include <sys/param.h>
 #include <sys/bio.h>
 #include <sys/bus.h>
 #include <sys/devicestat.h>
 #include <sys/disk.h>
 #include <sys/fcntl.h>
 #include <sys/fdcio.h>
 #include <sys/filio.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/mutex.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/rman.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 
 #include <geom/geom.h>
 
 #include <machine/bus.h>
 #include <machine/clock.h>
 #include <machine/stdarg.h>
 
 #include <isa/isavar.h>
 #include <isa/isareg.h>
 #include <isa/rtc.h>
 #include <dev/fdc/fdcvar.h>
 
 #include <dev/ic/nec765.h>
 
 /*
  * Runtime configuration hints/flags
  */
 
 /* configuration flags for fd */
 #define FD_TYPEMASK	0x0f	/* drive type, matches enum
 				 * fd_drivetype; on i386 machines, if
 				 * given as 0, use RTC type for fd0
 				 * and fd1 */
 #define	FD_NO_CHLINE	0x10	/* drive does not support changeline
 				 * aka. unit attention */
 #define FD_NO_PROBE	0x20	/* don't probe drive (seek test), just
 				 * assume it is there */
 
 /*
  * Things that could conceiveably considered parameters or tweakables
  */
 
 /*
  * Maximal number of bytes in a cylinder.
  * This is used for ISADMA bouncebuffer allocation and sets the max
  * xfersize we support.
  *
  * 2.88M format has 2 x 36 x 512, allow for hacked up density.
  */
 #define MAX_BYTES_PER_CYL	(2 * 40 * 512)
 
 /*
  * Timeout value for the PIO loops to wait until the FDC main status
  * register matches our expectations (request for master, direction
  * bit).  This is supposed to be a number of microseconds, although
  * timing might actually not be very accurate.
  *
  * Timeouts of 100 msec are believed to be required for some broken
  * (old) hardware.
  */
 #define	FDSTS_TIMEOUT	100000
 
 /*
  * After this many errors, stop whining.  Close will reset this count.
  */
 #define FDC_ERRMAX	100
 
 /*
  * AutoDensity search lists for each drive type.
  */
 
 static struct fd_type fd_searchlist_360k[] = {
 	{ FDF_5_360 },
 	{ 0 }
 };
 
 static struct fd_type fd_searchlist_12m[] = {
 	{ FDF_5_1200 | FL_AUTO },
 	{ FDF_5_400 | FL_AUTO },
 	{ FDF_5_360 | FL_2STEP | FL_AUTO},
 	{ 0 }
 };
 
 static struct fd_type fd_searchlist_720k[] = {
 	{ FDF_3_720 },
 	{ 0 }
 };
 
 static struct fd_type fd_searchlist_144m[] = {
 	{ FDF_3_1440 | FL_AUTO},
 	{ FDF_3_720 | FL_AUTO},
 	{ 0 }
 };
 
 static struct fd_type fd_searchlist_288m[] = {
 	{ FDF_3_1440 | FL_AUTO },
 #if 0
 	{ FDF_3_2880 | FL_AUTO }, /* XXX: probably doesn't work */
 #endif
 	{ FDF_3_720 | FL_AUTO},
 	{ 0 }
 };
 
 /*
  * Order must match enum fd_drivetype in <sys/fdcio.h>.
  */
 static struct fd_type *fd_native_types[] = {
 	NULL,				/* FDT_NONE */
 	fd_searchlist_360k, 		/* FDT_360K */
 	fd_searchlist_12m, 		/* FDT_12M */
 	fd_searchlist_720k, 		/* FDT_720K */
 	fd_searchlist_144m, 		/* FDT_144M */
 	fd_searchlist_288m,		/* FDT_288M_1 (mapped to FDT_288M) */
 	fd_searchlist_288m, 		/* FDT_288M */
 };
 
 /*
  * Internals start here
  */
 
 /* registers */
 #define	FDOUT	2	/* Digital Output Register (W) */
 #define	FDO_FDSEL	0x03	/*  floppy device select */
 #define	FDO_FRST	0x04	/*  floppy controller reset */
 #define	FDO_FDMAEN	0x08	/*  enable floppy DMA and Interrupt */
 #define	FDO_MOEN0	0x10	/*  motor enable drive 0 */
 #define	FDO_MOEN1	0x20	/*  motor enable drive 1 */
 #define	FDO_MOEN2	0x40	/*  motor enable drive 2 */
 #define	FDO_MOEN3	0x80	/*  motor enable drive 3 */
 
 #define	FDSTS	4	/* NEC 765 Main Status Register (R) */
 #define FDDSR	4	/* Data Rate Select Register (W) */
 #define	FDDATA	5	/* NEC 765 Data Register (R/W) */
 #define	FDCTL	7	/* Control Register (W) */
 
 /*
  * The YE-DATA PC Card floppies use PIO to read in the data rather
  * than DMA due to the wild variability of DMA for the PC Card
  * devices.  DMA was deleted from the PC Card specification in version
  * 7.2 of the standard, but that post-dates the YE-DATA devices by many
  * years.
  *
  * In addition, if we cannot setup the DMA resources for the ISA
  * attachment, we'll use this same offset for data transfer.  However,
  * that almost certainly won't work.
  *
  * For this mode, offset 0 and 1 must be used to setup the transfer
  * for this floppy.  This is OK for PC Card YE Data devices, but for
  * ISA this is likely wrong.  These registers are only available on
  * those systems that map them to the floppy drive.  Newer systems do
  * not do this, and we should likely prohibit access to them (or
  * disallow NODMA to be set).
  */
 #define FDBCDR		0	/* And 1 */
 #define FD_YE_DATAPORT	6	/* Drive Data port */
 
 #define	FDI_DCHG	0x80	/* diskette has been changed */
 				/* requires drive and motor being selected */
 				/* is cleared by any step pulse to drive */
 
 /*
  * We have three private BIO commands.
  */
 #define BIO_PROBE	BIO_CMD0
 #define BIO_RDID	BIO_CMD1
 #define BIO_FMT		BIO_CMD2
 
 /*
  * Per drive structure (softc).
  */
 struct fd_data {
 	u_char 	*fd_ioptr;	/* IO pointer */
 	u_int	fd_iosize;	/* Size of IO chunks */
 	u_int	fd_iocount;	/* Outstanding requests */
 	struct	fdc_data *fdc;	/* pointer to controller structure */
 	int	fdsu;		/* this units number on this controller */
 	enum	fd_drivetype type; /* drive type */
 	struct	fd_type *ft;	/* pointer to current type descriptor */
 	struct	fd_type fts;	/* type descriptors */
 	int	sectorsize;
 	int	flags;
 #define	FD_WP		(1<<0)	/* Write protected	*/
 #define	FD_MOTOR	(1<<1)	/* motor should be on	*/
 #define	FD_MOTORWAIT	(1<<2)	/* motor should be on	*/
 #define	FD_EMPTY	(1<<3)	/* no media		*/
 #define	FD_NEWDISK	(1<<4)	/* media changed	*/
 #define	FD_ISADMA	(1<<5)	/* isa dma started 	*/
 	int	track;		/* where we think the head is */
 #define FD_NO_TRACK	 -2
 	int	options;	/* FDOPT_* */
 	struct	callout toffhandle;
 	struct g_geom *fd_geom;
 	struct g_provider *fd_provider;
 	device_t dev;
 	struct bio_queue_head fd_bq;
 };
 
 #define FD_NOT_VALID -2
 
 static driver_intr_t fdc_intr;
 static driver_filter_t fdc_intr_fast;
 static void fdc_reset(struct fdc_data *);
 static int fd_probe_disk(struct fd_data *, int *);
 
 static SYSCTL_NODE(_debug, OID_AUTO, fdc, CTLFLAG_RW, 0, "fdc driver");
 
 static int fifo_threshold = 8;
 SYSCTL_INT(_debug_fdc, OID_AUTO, fifo, CTLFLAG_RW, &fifo_threshold, 0,
 	"FIFO threshold setting");
 
 static int debugflags = 0;
 SYSCTL_INT(_debug_fdc, OID_AUTO, debugflags, CTLFLAG_RW, &debugflags, 0,
 	"Debug flags");
 
 static int retries = 10;
 SYSCTL_INT(_debug_fdc, OID_AUTO, retries, CTLFLAG_RW, &retries, 0,
 	"Number of retries to attempt");
 
 static int spec1 = NE7_SPEC_1(6, 240);
 SYSCTL_INT(_debug_fdc, OID_AUTO, spec1, CTLFLAG_RW, &spec1, 0,
 	"Specification byte one (step-rate + head unload)");
 
 static int spec2 = NE7_SPEC_2(16, 0);
 SYSCTL_INT(_debug_fdc, OID_AUTO, spec2, CTLFLAG_RW, &spec2, 0,
 	"Specification byte two (head load time + no-dma)");
 
 static int settle;
 SYSCTL_INT(_debug_fdc, OID_AUTO, settle, CTLFLAG_RW, &settle, 0,
 	"Head settling time in sec/hz");
 
 static void
 fdprinttype(struct fd_type *ft)
 {
 
 	printf("(%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,0x%x)",
 	    ft->sectrac, ft->secsize, ft->datalen, ft->gap, ft->tracks,
 	    ft->size, ft->trans, ft->heads, ft->f_gap, ft->f_inter,
 	    ft->offset_side2, ft->flags);
 }
 
 static void
 fdsettype(struct fd_data *fd, struct fd_type *ft)
 {
 	fd->ft = ft;
 	ft->size = ft->sectrac * ft->heads * ft->tracks;
 	fd->sectorsize = 128 << fd->ft->secsize;
 }
 
 /*
  * Bus space handling (access to low-level IO).
  */
 static inline void
 fdregwr(struct fdc_data *fdc, int reg, uint8_t v)
 {
 
 	bus_space_write_1(fdc->iot, fdc->ioh[reg], fdc->ioff[reg], v);
 }
 
 static inline uint8_t
 fdregrd(struct fdc_data *fdc, int reg)
 {
 
 	return bus_space_read_1(fdc->iot, fdc->ioh[reg], fdc->ioff[reg]);
 }
 
 static void
 fdctl_wr(struct fdc_data *fdc, u_int8_t v)
 {
 
 	fdregwr(fdc, FDCTL, v);
 }
 
 static void
 fdout_wr(struct fdc_data *fdc, u_int8_t v)
 {
 
 	fdregwr(fdc, FDOUT, v);
 }
 
 static u_int8_t
 fdsts_rd(struct fdc_data *fdc)
 {
 
 	return fdregrd(fdc, FDSTS);
 }
 
 static void
 fddsr_wr(struct fdc_data *fdc, u_int8_t v)
 {
 
 	fdregwr(fdc, FDDSR, v);
 }
 
 static void
 fddata_wr(struct fdc_data *fdc, u_int8_t v)
 {
 
 	fdregwr(fdc, FDDATA, v);
 }
 
 static u_int8_t
 fddata_rd(struct fdc_data *fdc)
 {
 
 	return fdregrd(fdc, FDDATA);
 }
 
 static u_int8_t
 fdin_rd(struct fdc_data *fdc)
 {
 
 	return fdregrd(fdc, FDCTL);
 }
 
 /*
  * Magic pseudo-DMA initialization for YE FDC. Sets count and
  * direction.
  */
 static void
 fdbcdr_wr(struct fdc_data *fdc, int iswrite, uint16_t count)
 {
 	fdregwr(fdc, FDBCDR, (count - 1) & 0xff);
 	fdregwr(fdc, FDBCDR + 1,
 	    (iswrite ? 0x80 : 0) | (((count - 1) >> 8) & 0x7f));
 }
 
 static int
 fdc_err(struct fdc_data *fdc, const char *s)
 {
 	fdc->fdc_errs++;
 	if (s) {
 		if (fdc->fdc_errs < FDC_ERRMAX)
 			device_printf(fdc->fdc_dev, "%s", s);
 		else if (fdc->fdc_errs == FDC_ERRMAX)
 			device_printf(fdc->fdc_dev, "too many errors, not "
 						    "logging any more\n");
 	}
 
 	return (1);
 }
 
 /*
  * FDC IO functions, take care of the main status register, timeout
  * in case the desired status bits are never set.
  *
  * These PIO loops initially start out with short delays between
  * each iteration in the expectation that the required condition
  * is usually met quickly, so it can be handled immediately.
  */
 static int
 fdc_in(struct fdc_data *fdc, int *ptr)
 {
 	int i, j, step;
 
 	step = 1;
 	for (j = 0; j < FDSTS_TIMEOUT; j += step) {
 	        i = fdsts_rd(fdc) & (NE7_DIO | NE7_RQM);
 	        if (i == (NE7_DIO|NE7_RQM)) {
 			i = fddata_rd(fdc);
 			if (ptr)
 				*ptr = i;
 			return (0);
 		}
 		if (i == NE7_RQM)
 			return (fdc_err(fdc, "ready for output in input\n"));
 		step += step;
 		DELAY(step);
 	}
 	return (fdc_err(fdc, bootverbose? "input ready timeout\n": 0));
 }
 
 static int
 fdc_out(struct fdc_data *fdc, int x)
 {
 	int i, j, step;
 
 	step = 1;
 	for (j = 0; j < FDSTS_TIMEOUT; j += step) {
 	        i = fdsts_rd(fdc) & (NE7_DIO | NE7_RQM);
 	        if (i == NE7_RQM) {
 			fddata_wr(fdc, x);
 			return (0);
 		}
 		if (i == (NE7_DIO|NE7_RQM))
 			return (fdc_err(fdc, "ready for input in output\n"));
 		step += step;
 		DELAY(step);
 	}
 	return (fdc_err(fdc, bootverbose? "output ready timeout\n": 0));
 }
 
 /*
  * fdc_cmd: Send a command to the chip.
  * Takes a varargs with this structure:
  *	# of output bytes
  *	output bytes as int [...]
  *	# of input bytes
  *	input bytes as int* [...]
  */
 static int
 fdc_cmd(struct fdc_data *fdc, int n_out, ...)
 {
 	u_char cmd = 0;
 	int n_in;
 	int n, i;
 	va_list ap;
 
 	va_start(ap, n_out);
 	for (n = 0; n < n_out; n++) {
 		i = va_arg(ap, int);
 		if (n == 0)
 			cmd = i;
 		if (fdc_out(fdc, i) < 0) {
 			char msg[50];
 			snprintf(msg, sizeof(msg),
 				"cmd %x failed at out byte %d of %d\n",
 				cmd, n + 1, n_out);
 			fdc->flags |= FDC_NEEDS_RESET;
 			va_end(ap);
 			return fdc_err(fdc, msg);
 		}
 	}
 	n_in = va_arg(ap, int);
 	for (n = 0; n < n_in; n++) {
 		int *ptr = va_arg(ap, int *);
 		if (fdc_in(fdc, ptr) < 0) {
 			char msg[50];
 			snprintf(msg, sizeof(msg),
 				"cmd %02x failed at in byte %d of %d\n",
 				cmd, n + 1, n_in);
 			fdc->flags |= FDC_NEEDS_RESET;
 			va_end(ap);
 			return fdc_err(fdc, msg);
 		}
 	}
 	va_end(ap);
 	return (0);
 }
 
 static void
 fdc_reset(struct fdc_data *fdc)
 {
 	int i, r[10];
 
 	if (fdc->fdct == FDC_ENHANCED) {
 		/* Try a software reset, default precomp, and 500 kb/s */
 		fddsr_wr(fdc, I8207X_DSR_SR);
 	} else {
 		/* Try a hardware reset, keep motor on */
 		fdout_wr(fdc, fdc->fdout & ~(FDO_FRST|FDO_FDMAEN));
 		DELAY(100);
 		/* enable FDC, but defer interrupts a moment */
 		fdout_wr(fdc, fdc->fdout & ~FDO_FDMAEN);
 	}
 	DELAY(100);
 	fdout_wr(fdc, fdc->fdout);
 
 	/* XXX after a reset, silently believe the FDC will accept commands */
 	if (fdc_cmd(fdc, 3, NE7CMD_SPECIFY, spec1, spec2, 0))
 		device_printf(fdc->fdc_dev, " SPECIFY failed in reset\n");
 
 	if (fdc->fdct == FDC_ENHANCED) {
 		if (fdc_cmd(fdc, 4,
 		    I8207X_CONFIG,
 		    0,
 		    /* 0x40 | */		/* Enable Implied Seek -
 						 * breaks 2step! */
 		    0x10 |			/* Polling disabled */
 		    (fifo_threshold - 1),	/* Fifo threshold */
 		    0x00,			/* Precomp track */
 		    0))
 			device_printf(fdc->fdc_dev,
 			    " CONFIGURE failed in reset\n");
 		if (debugflags & 1) {
 			if (fdc_cmd(fdc, 1,
 			    I8207X_DUMPREG,
 			    10, &r[0], &r[1], &r[2], &r[3], &r[4],
 			    &r[5], &r[6], &r[7], &r[8], &r[9]))
 				device_printf(fdc->fdc_dev,
 				    " DUMPREG failed in reset\n");
 			for (i = 0; i < 10; i++)
 				printf(" %02x", r[i]);
 			printf("\n");
 		}
 	}
 }
 
 static int
 fdc_sense_drive(struct fdc_data *fdc, int *st3p)
 {
 	int st3;
 
 	if (fdc_cmd(fdc, 2, NE7CMD_SENSED, fdc->fd->fdsu, 1, &st3))
 		return (fdc_err(fdc, "Sense Drive Status failed\n"));
 	if (st3p)
 		*st3p = st3;
 	return (0);
 }
 
 static int
 fdc_sense_int(struct fdc_data *fdc, int *st0p, int *cylp)
 {
 	int cyl, st0, ret;
 
 	ret = fdc_cmd(fdc, 1, NE7CMD_SENSEI, 1, &st0);
 	if (ret) {
 		(void)fdc_err(fdc, "sense intr err reading stat reg 0\n");
 		return (ret);
 	}
 
 	if (st0p)
 		*st0p = st0;
 
 	if ((st0 & NE7_ST0_IC) == NE7_ST0_IC_IV) {
 		/*
 		 * There doesn't seem to have been an interrupt.
 		 */
 		return (FD_NOT_VALID);
 	}
 
 	if (fdc_in(fdc, &cyl) < 0)
 		return fdc_err(fdc, "can't get cyl num\n");
 
 	if (cylp)
 		*cylp = cyl;
 
 	return (0);
 }
 
 static int
 fdc_read_status(struct fdc_data *fdc)
 {
 	int i, ret, status;
 
 	for (i = ret = 0; i < 7; i++) {
 		ret = fdc_in(fdc, &status);
 		fdc->status[i] = status;
 		if (ret != 0)
 			break;
 	}
 
 	if (ret == 0)
 		fdc->flags |= FDC_STAT_VALID;
 	else
 		fdc->flags &= ~FDC_STAT_VALID;
 
 	return ret;
 }
 
 /*
  * Select this drive
  */
 static void
 fd_select(struct fd_data *fd)
 {
 	struct fdc_data *fdc;
 
 	/* XXX: lock controller */
 	fdc = fd->fdc;
 	fdc->fdout &= ~FDO_FDSEL;
 	fdc->fdout |= FDO_FDMAEN | FDO_FRST | fd->fdsu;
 	fdout_wr(fdc, fdc->fdout);
 }
 
 static void
 fd_turnon(void *arg)
 {
 	struct fd_data *fd;
 	struct bio *bp;
 	int once;
 
 	fd = arg;
 	mtx_assert(&fd->fdc->fdc_mtx, MA_OWNED);
 	fd->flags &= ~FD_MOTORWAIT;
 	fd->flags |= FD_MOTOR;
 	once = 0;
 	for (;;) {
 		bp = bioq_takefirst(&fd->fd_bq);
 		if (bp == NULL)
 			break;
 		bioq_disksort(&fd->fdc->head, bp);
 		once = 1;
 	}
 	if (once)
 		wakeup(&fd->fdc->head);
 }
 
 static void
 fd_motor(struct fd_data *fd, int turnon)
 {
 	struct fdc_data *fdc;
 
 	fdc = fd->fdc;
 /*
 	mtx_assert(&fdc->fdc_mtx, MA_OWNED);
 */
 	if (turnon) {
 		fd->flags |= FD_MOTORWAIT;
 		fdc->fdout |= (FDO_MOEN0 << fd->fdsu);
 		callout_reset(&fd->toffhandle, hz, fd_turnon, fd);
 	} else {
 		callout_stop(&fd->toffhandle);
 		fd->flags &= ~(FD_MOTOR|FD_MOTORWAIT);
 		fdc->fdout &= ~(FDO_MOEN0 << fd->fdsu);
 	}
 	fdout_wr(fdc, fdc->fdout);
 }
 
 static void
 fd_turnoff(void *xfd)
 {
 	struct fd_data *fd = xfd;
 
 	mtx_assert(&fd->fdc->fdc_mtx, MA_OWNED);
 	fd_motor(fd, 0);
 }
 
 /*
  * fdc_intr - wake up the worker thread.
  */
 
 static void
 fdc_intr(void *arg)
 {
 
 	wakeup(arg);
 }
 
 static int
 fdc_intr_fast(void *arg)
 {
 
 	wakeup(arg);
 	return(FILTER_HANDLED);
 }
 
 /*
  * fdc_pio(): perform programmed IO read/write for YE PCMCIA floppy.
  */
 static void
 fdc_pio(struct fdc_data *fdc)
 {
 	u_char *cptr;
 	struct bio *bp;
 	u_int count;
 
 	bp = fdc->bp;
 	cptr = fdc->fd->fd_ioptr;
 	count = fdc->fd->fd_iosize;
 
 	if (bp->bio_cmd == BIO_READ) {
 		fdbcdr_wr(fdc, 0, count);
 		bus_space_read_multi_1(fdc->iot, fdc->ioh[FD_YE_DATAPORT],
 		    fdc->ioff[FD_YE_DATAPORT], cptr, count);
 	} else {
 		bus_space_write_multi_1(fdc->iot, fdc->ioh[FD_YE_DATAPORT],
 		    fdc->ioff[FD_YE_DATAPORT], cptr, count);
 		fdbcdr_wr(fdc, 0, count);	/* needed? */
 	}
 }
 
 static int
 fdc_biodone(struct fdc_data *fdc, int error)
 {
 	struct fd_data *fd;
 	struct bio *bp;
 
 	fd = fdc->fd;
 	bp = fdc->bp;
 
 	mtx_lock(&fdc->fdc_mtx);
 	if (--fd->fd_iocount == 0)
 		callout_reset(&fd->toffhandle, 4 * hz, fd_turnoff, fd);
 	fdc->bp = NULL;
 	fdc->fd = NULL;
 	mtx_unlock(&fdc->fdc_mtx);
 	if (bp->bio_to != NULL) {
 		if ((debugflags & 2) && fd->fdc->retry > 0)
 			printf("retries: %d\n", fd->fdc->retry);
 		g_io_deliver(bp, error);
 		return (0);
 	}
 	bp->bio_error = error;
 	bp->bio_flags |= BIO_DONE;
 	wakeup(bp);
 	return (0);
 }
 
 static int retry_line;
 
 static int
 fdc_worker(struct fdc_data *fdc)
 {
 	struct fd_data *fd;
 	struct bio *bp;
 	int i, nsect;
 	int st0, st3, cyl, mfm, steptrac, cylinder, descyl, sec;
 	int head;
 	int override_error;
 	static int need_recal;
 	struct fdc_readid *idp;
 	struct fd_formb *finfo;
 
 	override_error = 0;
 
 	/* Have we exhausted our retries ? */
 	bp = fdc->bp;
 	fd = fdc->fd;
 	if (bp != NULL &&
 		(fdc->retry >= retries || (fd->options & FDOPT_NORETRY))) {
 		if ((debugflags & 4))
 			printf("Too many retries (EIO)\n");
 		if (fdc->flags & FDC_NEEDS_RESET) {
 			mtx_lock(&fdc->fdc_mtx);
 			fd->flags |= FD_EMPTY;
 			mtx_unlock(&fdc->fdc_mtx);
 		}
 		return (fdc_biodone(fdc, EIO));
 	}
 
 	/* Disable ISADMA if we bailed while it was active */
 	if (fd != NULL && (fd->flags & FD_ISADMA)) {
 		isa_dmadone(
 		    bp->bio_cmd == BIO_READ ? ISADMA_READ : ISADMA_WRITE,
 		    fd->fd_ioptr, fd->fd_iosize, fdc->dmachan);
 		mtx_lock(&fdc->fdc_mtx);
 		fd->flags &= ~FD_ISADMA;
 		mtx_unlock(&fdc->fdc_mtx);
 	}
 
 	/* Unwedge the controller ? */
 	if (fdc->flags & FDC_NEEDS_RESET) {
 		fdc->flags &= ~FDC_NEEDS_RESET;
 		fdc_reset(fdc);
 		if (cold)
 			DELAY(1000000);
 		else
 			tsleep(fdc, PRIBIO, "fdcrst", hz);
 		/* Discard results */
 		for (i = 0; i < 4; i++)
 			fdc_sense_int(fdc, &st0, &cyl);
 		/* All drives must recal */
 		need_recal = 0xf;
 	}
 
 	/* Pick up a request, if need be wait for it */
 	if (fdc->bp == NULL) {
 		mtx_lock(&fdc->fdc_mtx);
 		do {
 			fdc->bp = bioq_takefirst(&fdc->head);
 			if (fdc->bp == NULL)
 				msleep(&fdc->head, &fdc->fdc_mtx,
 				    PRIBIO, "-", 0);
 		} while (fdc->bp == NULL &&
 		    (fdc->flags & FDC_KTHREAD_EXIT) == 0);
 		mtx_unlock(&fdc->fdc_mtx);
 
 		if (fdc->bp == NULL)
 			/*
 			 * Nothing to do, worker thread has been
 			 * requested to stop.
 			 */
 			return (0);
 
 		bp = fdc->bp;
 		fd = fdc->fd = bp->bio_driver1;
 		fdc->retry = 0;
 		fd->fd_ioptr = bp->bio_data;
 		if (bp->bio_cmd == BIO_FMT) {
 			i = offsetof(struct fd_formb, fd_formb_cylno(0));
 			fd->fd_ioptr += i;
 			fd->fd_iosize = bp->bio_length - i;
 		}
 	}
 
 	/* Select drive, setup params */
 	fd_select(fd);
 	if (fdc->fdct == FDC_ENHANCED)
 		fddsr_wr(fdc, fd->ft->trans);
 	else
 		fdctl_wr(fdc, fd->ft->trans);
 
 	if (bp->bio_cmd == BIO_PROBE) {
 		if ((!(device_get_flags(fd->dev) & FD_NO_CHLINE) &&
 		    !(fdin_rd(fdc) & FDI_DCHG) &&
 		    !(fd->flags & FD_EMPTY)) ||
 		    fd_probe_disk(fd, &need_recal) == 0)
 			return (fdc_biodone(fdc, 0));
 		return (1);
 	}
 
 	/*
 	 * If we are dead just flush the requests
 	 */
 	if (fd->flags & FD_EMPTY)
 		return (fdc_biodone(fdc, ENXIO));
 
 	/* Check if we lost our media */
 	if (fdin_rd(fdc) & FDI_DCHG) {
 		if (debugflags & 0x40)
 			printf("Lost disk\n");
 		mtx_lock(&fdc->fdc_mtx);
 		fd->flags |= FD_EMPTY;
 		fd->flags |= FD_NEWDISK;
 		mtx_unlock(&fdc->fdc_mtx);
 		g_topology_lock();
 		g_orphan_provider(fd->fd_provider, ENXIO);
 		fd->fd_provider->flags |= G_PF_WITHER;
 		fd->fd_provider =
 		    g_new_providerf(fd->fd_geom, "%s", fd->fd_geom->name);
 		g_error_provider(fd->fd_provider, 0);
 		g_topology_unlock();
 		return (fdc_biodone(fdc, ENXIO));
 	}
 
 	/* Check if the floppy is write-protected */
 	if (bp->bio_cmd == BIO_FMT || bp->bio_cmd == BIO_WRITE) {
 		retry_line = __LINE__;
 		if(fdc_sense_drive(fdc, &st3) != 0)
 			return (1);
 		if(st3 & NE7_ST3_WP)
 			return (fdc_biodone(fdc, EROFS));
 	}
 
 	mfm = (fd->ft->flags & FL_MFM)? NE7CMD_MFM: 0;
 	steptrac = (fd->ft->flags & FL_2STEP)? 2: 1;
 	i = fd->ft->sectrac * fd->ft->heads;
 	cylinder = bp->bio_pblkno / i;
 	descyl = cylinder * steptrac;
 	sec = bp->bio_pblkno % i;
 	nsect = i - sec;
 	head = sec / fd->ft->sectrac;
 	sec = sec % fd->ft->sectrac + 1;
 
 	/* If everything is going swimmingly, use multisector xfer */
 	if (fdc->retry == 0 &&
 	    (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE)) {
 		fd->fd_iosize = imin(nsect * fd->sectorsize, bp->bio_resid);
 		nsect = fd->fd_iosize / fd->sectorsize;
 	} else if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) {
 		fd->fd_iosize = fd->sectorsize;
 		nsect = 1;
 	}
 
 	/* Do RECAL if we need to or are going to track zero anyway */
 	if ((need_recal & (1 << fd->fdsu)) ||
 	    (cylinder == 0 && fd->track != 0) ||
 	    fdc->retry > 2) {
 		retry_line = __LINE__;
 		if (fdc_cmd(fdc, 2, NE7CMD_RECAL, fd->fdsu, 0))
 			return (1);
 		tsleep(fdc, PRIBIO, "fdrecal", hz);
 		retry_line = __LINE__;
 		if (fdc_sense_int(fdc, &st0, &cyl) == FD_NOT_VALID)
 			return (1); /* XXX */
 		retry_line = __LINE__;
 		if ((st0 & 0xc0) || cyl != 0)
 			return (1);
 		need_recal &= ~(1 << fd->fdsu);
 		fd->track = 0;
 		/* let the heads settle */
 		if (settle)
 			tsleep(fdc->fd, PRIBIO, "fdhdstl", settle);
 	}
 
 	/*
 	 * SEEK to where we want to be
 	 */
 	if (cylinder != fd->track) {
 		retry_line = __LINE__;
 		if (fdc_cmd(fdc, 3, NE7CMD_SEEK, fd->fdsu, descyl, 0))
 			return (1);
 		tsleep(fdc, PRIBIO, "fdseek", hz);
 		retry_line = __LINE__;
 		if (fdc_sense_int(fdc, &st0, &cyl) == FD_NOT_VALID)
 			return (1); /* XXX */
 		retry_line = __LINE__;
 		if ((st0 & 0xc0) || cyl != descyl) {
 			need_recal |= (1 << fd->fdsu);
 			return (1);
 		}
 		/* let the heads settle */
 		if (settle)
 			tsleep(fdc->fd, PRIBIO, "fdhdstl", settle);
 	}
 	fd->track = cylinder;
 
 	if (debugflags & 8)
 		printf("op %x bn %ju siz %u ptr %p retry %d\n",
 		    bp->bio_cmd, bp->bio_pblkno, fd->fd_iosize,
 		    fd->fd_ioptr, fdc->retry);
 
 	/* Setup ISADMA if we need it and have it */
 	if ((bp->bio_cmd == BIO_READ ||
 		bp->bio_cmd == BIO_WRITE ||
 		bp->bio_cmd == BIO_FMT)
 	     && !(fdc->flags & FDC_NODMA)) {
 		isa_dmastart(
 		    bp->bio_cmd == BIO_READ ? ISADMA_READ : ISADMA_WRITE,
 		    fd->fd_ioptr, fd->fd_iosize, fdc->dmachan);
 		mtx_lock(&fdc->fdc_mtx);
 		fd->flags |= FD_ISADMA;
 		mtx_unlock(&fdc->fdc_mtx);
 	}
 
 	/* Do PIO if we have to */
 	if (fdc->flags & FDC_NODMA) {
 		if (bp->bio_cmd == BIO_READ ||
 		    bp->bio_cmd == BIO_WRITE ||
 		    bp->bio_cmd == BIO_FMT)
 			fdbcdr_wr(fdc, 1, fd->fd_iosize);
 		if (bp->bio_cmd == BIO_WRITE ||
 		    bp->bio_cmd == BIO_FMT)
 			fdc_pio(fdc);
 	}
 
 	switch(bp->bio_cmd) {
 	case BIO_FMT:
 		/* formatting */
 		finfo = (struct fd_formb *)bp->bio_data;
 		retry_line = __LINE__;
 		if (fdc_cmd(fdc, 6,
 		    NE7CMD_FORMAT | mfm,
 		    head << 2 | fd->fdsu,
 		    finfo->fd_formb_secshift,
 		    finfo->fd_formb_nsecs,
 		    finfo->fd_formb_gaplen,
 		    finfo->fd_formb_fillbyte, 0))
 			return (1);
 		break;
 	case BIO_RDID:
 		retry_line = __LINE__;
 		if (fdc_cmd(fdc, 2,
 		    NE7CMD_READID | mfm,
 		    head << 2 | fd->fdsu, 0))
 			return (1);
 		break;
 	case BIO_READ:
 		retry_line = __LINE__;
 		if (fdc_cmd(fdc, 9,
 		    NE7CMD_READ | NE7CMD_SK | mfm | NE7CMD_MT,
 		    head << 2 | fd->fdsu,	/* head & unit */
 		    fd->track,			/* track */
 		    head,			/* head */
 		    sec,			/* sector + 1 */
 		    fd->ft->secsize,		/* sector size */
 		    fd->ft->sectrac,		/* sectors/track */
 		    fd->ft->gap,		/* gap size */
 		    fd->ft->datalen,		/* data length */
 		    0))
 			return (1);
 		break;
 	case BIO_WRITE:
 		retry_line = __LINE__;
 		if (fdc_cmd(fdc, 9,
 		    NE7CMD_WRITE | mfm | NE7CMD_MT,
 		    head << 2 | fd->fdsu,	/* head & unit */
 		    fd->track,			/* track */
 		    head,			/* head */
 		    sec,			/* sector + 1 */
 		    fd->ft->secsize,		/* sector size */
 		    fd->ft->sectrac,		/* sectors/track */
 		    fd->ft->gap,		/* gap size */
 		    fd->ft->datalen,		/* data length */
 		    0))
 			return (1);
 		break;
 	default:
 		KASSERT(0 == 1, ("Wrong bio_cmd %x\n", bp->bio_cmd));
 	}
 
 	/* Wait for interrupt */
 	i = tsleep(fdc, PRIBIO, "fddata", hz);
 
 	/* PIO if the read looks good */
 	if (i == 0 && (fdc->flags & FDC_NODMA) && (bp->bio_cmd == BIO_READ))
 		fdc_pio(fdc);
 
 	/* Finish DMA */
 	if (fd->flags & FD_ISADMA) {
 		isa_dmadone(
 		    bp->bio_cmd == BIO_READ ? ISADMA_READ : ISADMA_WRITE,
 		    fd->fd_ioptr, fd->fd_iosize, fdc->dmachan);
 		mtx_lock(&fdc->fdc_mtx);
 		fd->flags &= ~FD_ISADMA;
 		mtx_unlock(&fdc->fdc_mtx);
 	}
 
 	if (i != 0) {
 		/*
 		 * Timeout.
 		 *
 		 * Due to IBM's brain-dead design, the FDC has a faked ready
 		 * signal, hardwired to ready == true. Thus, any command
 		 * issued if there's no diskette in the drive will _never_
 		 * complete, and must be aborted by resetting the FDC.
 		 * Many thanks, Big Blue!
 		 */
 		retry_line = __LINE__;
 		fdc->flags |= FDC_NEEDS_RESET;
 		return (1);
 	}
 
 	retry_line = __LINE__;
 	if (fdc_read_status(fdc))
 		return (1);
 
 	if (debugflags & 0x10)
 		printf("  -> %x %x %x %x\n",
 		    fdc->status[0], fdc->status[1],
 		    fdc->status[2], fdc->status[3]);
 
 	st0 = fdc->status[0] & NE7_ST0_IC;
 	if (st0 != 0) {
 		retry_line = __LINE__;
 		if (st0 == NE7_ST0_IC_AT && fdc->status[1] & NE7_ST1_OR) {
 			/*
 			 * DMA overrun. Someone hogged the bus and
 			 * didn't release it in time for the next
 			 * FDC transfer.
 			 */
 			return (1);
 		}
 		retry_line = __LINE__;
 		if(st0 == NE7_ST0_IC_IV) {
 			fdc->flags |= FDC_NEEDS_RESET;
 			return (1);
 		}
 		retry_line = __LINE__;
 		if(st0 == NE7_ST0_IC_AT && fdc->status[2] & NE7_ST2_WC) {
 			need_recal |= (1 << fd->fdsu);
 			return (1);
 		}
 		if (debugflags & 0x20) {
 			printf("status %02x %02x %02x %02x %02x %02x\n",
 			    fdc->status[0], fdc->status[1], fdc->status[2],
 			    fdc->status[3], fdc->status[4], fdc->status[5]);
 		}
 		retry_line = __LINE__;
 		if (fd->options & FDOPT_NOERROR)
 			override_error = 1;
 		else
 			return (1);
 	}
 	/* All OK */
 	switch(bp->bio_cmd) {
 	case BIO_RDID:
 		/* copy out ID field contents */
 		idp = (struct fdc_readid *)bp->bio_data;
 		idp->cyl = fdc->status[3];
 		idp->head = fdc->status[4];
 		idp->sec = fdc->status[5];
 		idp->secshift = fdc->status[6];
 		if (debugflags & 0x40)
 			printf("c %d h %d s %d z %d\n",
 			    idp->cyl, idp->head, idp->sec, idp->secshift);
 		break;
 	case BIO_READ:
 	case BIO_WRITE:
 		bp->bio_pblkno += nsect;
 		bp->bio_resid -= fd->fd_iosize;
 		bp->bio_completed += fd->fd_iosize;
 		fd->fd_ioptr += fd->fd_iosize;
 		if (override_error) {
 			if ((debugflags & 4))
 				printf("FDOPT_NOERROR: returning bad data\n");
 		} else {
 			/* Since we managed to get something done,
 			 * reset the retry */
 			fdc->retry = 0;
 			if (bp->bio_resid > 0)
 				return (0);
 		}
 		break;
 	case BIO_FMT:
 		break;
 	}
 	return (fdc_biodone(fdc, 0));
 }
 
 static void
 fdc_thread(void *arg)
 {
 	struct fdc_data *fdc;
 
 	fdc = arg;
 	int i;
 
 	mtx_lock(&fdc->fdc_mtx);
 	fdc->flags |= FDC_KTHREAD_ALIVE;
 	while ((fdc->flags & FDC_KTHREAD_EXIT) == 0) {
 		mtx_unlock(&fdc->fdc_mtx);
 		i = fdc_worker(fdc);
 		if (i && debugflags & 0x20) {
-			if (fdc->bp != NULL) {
-				g_print_bio(fdc->bp);
-				printf("\n");
-			}
+			if (fdc->bp != NULL)
+				g_print_bio("", fdc->bp, "");
 			printf("Retry line %d\n", retry_line);
 		}
 		fdc->retry += i;
 		mtx_lock(&fdc->fdc_mtx);
 	}
 	fdc->flags &= ~(FDC_KTHREAD_EXIT | FDC_KTHREAD_ALIVE);
 	mtx_unlock(&fdc->fdc_mtx);
 
 	kproc_exit(0);
 }
 
 /*
  * Enqueue a request.
  */
 static void
 fd_enqueue(struct fd_data *fd, struct bio *bp)
 {
 	struct fdc_data *fdc;
 	int call;
 
 	call = 0;
 	fdc = fd->fdc;
 	mtx_lock(&fdc->fdc_mtx);
 	/* If we go from idle, cancel motor turnoff */
 	if (fd->fd_iocount++ == 0)
 		callout_stop(&fd->toffhandle);
 	if (fd->flags & FD_MOTOR) {
 		/* The motor is on, send it directly to the controller */
 		bioq_disksort(&fdc->head, bp);
 		wakeup(&fdc->head);
 	} else {
 		/* Queue it on the drive until the motor has started */
 		bioq_insert_tail(&fd->fd_bq, bp);
 		if (!(fd->flags & FD_MOTORWAIT))
 			fd_motor(fd, 1);
 	}
 	mtx_unlock(&fdc->fdc_mtx);
 }
 
 /*
  * Try to find out if we have a disk in the drive.
  */
 static int
 fd_probe_disk(struct fd_data *fd, int *recal)
 {
 	struct fdc_data *fdc;
 	int st0, st3, cyl;
 	int oopts, ret;
 
 	fdc = fd->fdc;
 	oopts = fd->options;
 	fd->options |= FDOPT_NOERRLOG | FDOPT_NORETRY;
 	ret = 1;
 
 	/*
 	 * First recal, then seek to cyl#1, this clears the old condition on
 	 * the disk change line so we can examine it for current status.
 	 */
 	if (debugflags & 0x40)
 		printf("New disk in probe\n");
 	mtx_lock(&fdc->fdc_mtx);
 	fd->flags |= FD_NEWDISK;
 	mtx_unlock(&fdc->fdc_mtx);
 	if (fdc_cmd(fdc, 2, NE7CMD_RECAL, fd->fdsu, 0))
 		goto done;
 	tsleep(fdc, PRIBIO, "fdrecal", hz);
 	if (fdc_sense_int(fdc, &st0, &cyl) == FD_NOT_VALID)
 		goto done;	/* XXX */
 	if ((st0 & 0xc0) || cyl != 0)
 		goto done;
 
 	/* Seek to track 1 */
 	if (fdc_cmd(fdc, 3, NE7CMD_SEEK, fd->fdsu, 1, 0))
 		goto done;
 	tsleep(fdc, PRIBIO, "fdseek", hz);
 	if (fdc_sense_int(fdc, &st0, &cyl) == FD_NOT_VALID)
 		goto done;	/* XXX */
 	*recal |= (1 << fd->fdsu);
 	if (fdin_rd(fdc) & FDI_DCHG) {
 		if (debugflags & 0x40)
 			printf("Empty in probe\n");
 		mtx_lock(&fdc->fdc_mtx);
 		fd->flags |= FD_EMPTY;
 		mtx_unlock(&fdc->fdc_mtx);
 	} else {
 		if (fdc_sense_drive(fdc, &st3) != 0)
 			goto done;
 		if (debugflags & 0x40)
 			printf("Got disk in probe\n");
 		mtx_lock(&fdc->fdc_mtx);
 		fd->flags &= ~FD_EMPTY;
 		if (st3 & NE7_ST3_WP)
 			fd->flags |= FD_WP;
 		else
 			fd->flags &= ~FD_WP;
 		mtx_unlock(&fdc->fdc_mtx);
 	}
 	ret = 0;
 
 done:
 	fd->options = oopts;
 	return (ret);
 }
 
 static int
 fdmisccmd(struct fd_data *fd, u_int cmd, void *data)
 {
 	struct bio *bp;
 	struct fd_formb *finfo;
 	struct fdc_readid *idfield;
 	int error;
 
 	bp = malloc(sizeof(struct bio), M_TEMP, M_WAITOK | M_ZERO);
 
 	/*
 	 * Set up a bio request for fdstrategy().  bio_offset is faked
 	 * so that fdstrategy() will seek to the requested
 	 * cylinder, and use the desired head.
 	 */
 	bp->bio_cmd = cmd;
 	if (cmd == BIO_FMT) {
 		finfo = (struct fd_formb *)data;
 		bp->bio_pblkno =
 		    (finfo->cyl * fd->ft->heads + finfo->head) *
 		    fd->ft->sectrac;
 		bp->bio_length = sizeof *finfo;
 	} else if (cmd == BIO_RDID) {
 		idfield = (struct fdc_readid *)data;
 		bp->bio_pblkno =
 		    (idfield->cyl * fd->ft->heads + idfield->head) *
 		    fd->ft->sectrac;
 		bp->bio_length = sizeof(struct fdc_readid);
 	} else if (cmd == BIO_PROBE) {
 		/* nothing */
 	} else
 		panic("wrong cmd in fdmisccmd()");
 	bp->bio_offset = bp->bio_pblkno * fd->sectorsize;
 	bp->bio_data = data;
 	bp->bio_driver1 = fd;
 	bp->bio_flags = 0;
 
 	fd_enqueue(fd, bp);
 
 	do {
 		tsleep(bp, PRIBIO, "fdwait", hz);
 	} while (!(bp->bio_flags & BIO_DONE));
 	error = bp->bio_error;
 
 	free(bp, M_TEMP);
 	return (error);
 }
 
 /*
  * Try figuring out the density of the media present in our device.
  */
 static int
 fdautoselect(struct fd_data *fd)
 {
 	struct fd_type *fdtp;
 	struct fdc_readid id;
 	int oopts, rv;
 
 	if (!(fd->ft->flags & FL_AUTO))
 		return (0);
 
 	fdtp = fd_native_types[fd->type];
 	fdsettype(fd, fdtp);
 	if (!(fd->ft->flags & FL_AUTO))
 		return (0);
 
 	/*
 	 * Try reading sector ID fields, first at cylinder 0, head 0,
 	 * then at cylinder 2, head N.  We don't probe cylinder 1,
 	 * since for 5.25in DD media in a HD drive, there are no data
 	 * to read (2 step pulses per media cylinder required).  For
 	 * two-sided media, the second probe always goes to head 1, so
 	 * we can tell them apart from single-sided media.  As a
 	 * side-effect this means that single-sided media should be
 	 * mentioned in the search list after two-sided media of an
 	 * otherwise identical density.  Media with a different number
 	 * of sectors per track but otherwise identical parameters
 	 * cannot be distinguished at all.
 	 *
 	 * If we successfully read an ID field on both cylinders where
 	 * the recorded values match our expectation, we are done.
 	 * Otherwise, we try the next density entry from the table.
 	 *
 	 * Stepping to cylinder 2 has the side-effect of clearing the
 	 * unit attention bit.
 	 */
 	oopts = fd->options;
 	fd->options |= FDOPT_NOERRLOG | FDOPT_NORETRY;
 	for (; fdtp->heads; fdtp++) {
 		fdsettype(fd, fdtp);
 
 		id.cyl = id.head = 0;
 		rv = fdmisccmd(fd, BIO_RDID, &id);
 		if (rv != 0)
 			continue;
 		if (id.cyl != 0 || id.head != 0 || id.secshift != fdtp->secsize)
 			continue;
 		id.cyl = 2;
 		id.head = fd->ft->heads - 1;
 		rv = fdmisccmd(fd, BIO_RDID, &id);
 		if (id.cyl != 2 || id.head != fdtp->heads - 1 ||
 		    id.secshift != fdtp->secsize)
 			continue;
 		if (rv == 0)
 			break;
 	}
 
 	fd->options = oopts;
 	if (fdtp->heads == 0) {
 		if (debugflags & 0x40)
 			device_printf(fd->dev, "autoselection failed\n");
 		fdsettype(fd, fd_native_types[fd->type]);
 		return (-1);
 	} else {
 		if (debugflags & 0x40) {
 			device_printf(fd->dev,
 			    "autoselected %d KB medium\n",
 			    fd->ft->size / 2);
 			fdprinttype(fd->ft);
 		}
 		return (0);
 	}
 }
 
 /*
  * GEOM class implementation
  */
 
 static g_access_t	fd_access;
 static g_start_t	fd_start;
 static g_ioctl_t	fd_ioctl;
 
 struct g_class g_fd_class = {
 	.name =		"FD",
 	.version =	G_VERSION,
 	.start =	fd_start,
 	.access =	fd_access,
 	.ioctl =	fd_ioctl,
 };
 
 static int
 fd_access(struct g_provider *pp, int r, int w, int e)
 {
 	struct fd_data *fd;
 	struct fdc_data *fdc;
 	int ar, aw, ae;
 	int busy;
 
 	fd = pp->geom->softc;
 	fdc = fd->fdc;
 
 	/*
 	 * If our provider is withering, we can only get negative requests
 	 * and we don't want to even see them
 	 */
 	if (pp->flags & G_PF_WITHER)
 		return (0);
 
 	ar = r + pp->acr;
 	aw = w + pp->acw;
 	ae = e + pp->ace;
 
 	if (ar == 0 && aw == 0 && ae == 0) {
 		fd->options &= ~(FDOPT_NORETRY | FDOPT_NOERRLOG | FDOPT_NOERROR);
 		device_unbusy(fd->dev);
 		return (0);
 	}
 
 	busy = 0;
 	if (pp->acr == 0 && pp->acw == 0 && pp->ace == 0) {
 		if (fdmisccmd(fd, BIO_PROBE, NULL))
 			return (ENXIO);
 		if (fd->flags & FD_EMPTY)
 			return (ENXIO);
 		if (fd->flags & FD_NEWDISK) {
 			if (fdautoselect(fd) != 0 &&
 			    (device_get_flags(fd->dev) & FD_NO_CHLINE)) {
 				mtx_lock(&fdc->fdc_mtx);
 				fd->flags |= FD_EMPTY;
 				mtx_unlock(&fdc->fdc_mtx);
 				return (ENXIO);
 			}
 			mtx_lock(&fdc->fdc_mtx);
 			fd->flags &= ~FD_NEWDISK;
 			mtx_unlock(&fdc->fdc_mtx);
 		}
 		device_busy(fd->dev);
 		busy = 1;
 	}
 
 	if (w > 0 && (fd->flags & FD_WP)) {
 		if (busy)
 			device_unbusy(fd->dev);
 		return (EROFS);
 	}
 
 	pp->sectorsize = fd->sectorsize;
 	pp->stripesize = fd->ft->heads * fd->ft->sectrac * fd->sectorsize;
 	pp->mediasize = pp->stripesize * fd->ft->tracks;
 	return (0);
 }
 
 static void
 fd_start(struct bio *bp)
 {
  	struct fdc_data *	fdc;
  	struct fd_data *	fd;
 
 	fd = bp->bio_to->geom->softc;
 	fdc = fd->fdc;
 	bp->bio_driver1 = fd;
 	if (bp->bio_cmd == BIO_GETATTR) {
 		if (g_handleattr_int(bp, "GEOM::fwsectors", fd->ft->sectrac))
 			return;
 		if (g_handleattr_int(bp, "GEOM::fwheads", fd->ft->heads))
 			return;
 		g_io_deliver(bp, ENOIOCTL);
 		return;
 	}
 	if (!(bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE)) {
 		g_io_deliver(bp, EOPNOTSUPP);
 		return;
 	}
 	bp->bio_pblkno = bp->bio_offset / fd->sectorsize;
 	bp->bio_resid = bp->bio_length;
 	fd_enqueue(fd, bp);
 	return;
 }
 
 static int
 fd_ioctl(struct g_provider *pp, u_long cmd, void *data, int fflag, struct thread *td)
 {
 	struct fd_data *fd;
 	struct fdc_status *fsp;
 	struct fdc_readid *rid;
 	int error;
 
 	fd = pp->geom->softc;
 
 	switch (cmd) {
 	case FD_GTYPE:                  /* get drive type */
 		*(struct fd_type *)data = *fd->ft;
 		return (0);
 
 	case FD_STYPE:                  /* set drive type */
 		/*
 		 * Allow setting drive type temporarily iff
 		 * currently unset.  Used for fdformat so any
 		 * user can set it, and then start formatting.
 		 */
 		fd->fts = *(struct fd_type *)data;
 		if (fd->fts.sectrac) {
 			/* XXX: check for rubbish */
 			fdsettype(fd, &fd->fts);
 		} else {
 			fdsettype(fd, fd_native_types[fd->type]);
 		}
 		if (debugflags & 0x40)
 			fdprinttype(fd->ft);
 		return (0);
 
 	case FD_GOPTS:			/* get drive options */
 		*(int *)data = fd->options;
 		return (0);
 
 	case FD_SOPTS:			/* set drive options */
 		fd->options = *(int *)data;
 		return (0);
 
 	case FD_CLRERR:
 		error = priv_check(td, PRIV_DRIVER);
 		if (error)
 			return (error);
 		fd->fdc->fdc_errs = 0;
 		return (0);
 
 	case FD_GSTAT:
 		fsp = (struct fdc_status *)data;
 		if ((fd->fdc->flags & FDC_STAT_VALID) == 0)
 			return (EINVAL);
 		memcpy(fsp->status, fd->fdc->status, 7 * sizeof(u_int));
 		return (0);
 
 	case FD_GDTYPE:
 		*(enum fd_drivetype *)data = fd->type;
 		return (0);
 
 	case FD_FORM:
 		if (!(fflag & FWRITE))
 			return (EPERM);
 		if (((struct fd_formb *)data)->format_version !=
 		    FD_FORMAT_VERSION)
 			return (EINVAL); /* wrong version of formatting prog */
 		error = fdmisccmd(fd, BIO_FMT, data);
 		mtx_lock(&fd->fdc->fdc_mtx);
 		fd->flags |= FD_NEWDISK;
 		mtx_unlock(&fd->fdc->fdc_mtx);
 		break;
 
 	case FD_READID:
 		rid = (struct fdc_readid *)data;
 		if (rid->cyl > 85 || rid->head > 1)
 			return (EINVAL);
 		error = fdmisccmd(fd, BIO_RDID, data);
 		break;
 
 	case FIONBIO:
 	case FIOASYNC:
 		/* For backwards compat with old fd*(8) tools */
 		error = 0;
 		break;
 
 	default:
 		if (debugflags & 0x80)
 			printf("Unknown ioctl %lx\n", cmd);
 		error = ENOIOCTL;
 		break;
 	}
 	return (error);
 };
 
 
 
 /*
  * Configuration/initialization stuff, per controller.
  */
 
 devclass_t fdc_devclass;
 static devclass_t fd_devclass;
 
 struct fdc_ivars {
 	int	fdunit;
 	int	fdtype;
 };
 
 void
 fdc_release_resources(struct fdc_data *fdc)
 {
 	device_t dev;
 	struct resource *last;
 	int i;
 
 	dev = fdc->fdc_dev;
 	if (fdc->fdc_intr)
 		bus_teardown_intr(dev, fdc->res_irq, fdc->fdc_intr);
 	fdc->fdc_intr = NULL;
 	if (fdc->res_irq != NULL)
 		bus_release_resource(dev, SYS_RES_IRQ, fdc->rid_irq,
 		    fdc->res_irq);
 	fdc->res_irq = NULL;
 	last = NULL;
 	for (i = 0; i < FDC_MAXREG; i++) {
 		if (fdc->resio[i] != NULL && fdc->resio[i] != last) {
 			bus_release_resource(dev, SYS_RES_IOPORT,
 			    fdc->ridio[i], fdc->resio[i]);
 			last = fdc->resio[i];
 			fdc->resio[i] = NULL;
 		}
 	}
 	if (fdc->res_drq != NULL)
 		bus_release_resource(dev, SYS_RES_DRQ, fdc->rid_drq,
 		    fdc->res_drq);
 	fdc->res_drq = NULL;
 }
 
 int
 fdc_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
 {
 	struct fdc_ivars *ivars = device_get_ivars(child);
 
 	switch (which) {
 	case FDC_IVAR_FDUNIT:
 		*result = ivars->fdunit;
 		break;
 	case FDC_IVAR_FDTYPE:
 		*result = ivars->fdtype;
 		break;
 	default:
 		return (ENOENT);
 	}
 	return (0);
 }
 
 int
 fdc_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
 {
 	struct fdc_ivars *ivars = device_get_ivars(child);
 
 	switch (which) {
 	case FDC_IVAR_FDUNIT:
 		ivars->fdunit = value;
 		break;
 	case FDC_IVAR_FDTYPE:
 		ivars->fdtype = value;
 		break;
 	default:
 		return (ENOENT);
 	}
 	return (0);
 }
 
 int
 fdc_initial_reset(device_t dev, struct fdc_data *fdc)
 {
 	int ic_type, part_id;
 
 	/*
 	 * A status value of 0xff is very unlikely, but not theoretically
 	 * impossible, but it is far more likely to indicate an empty bus.
 	 */
 	if (fdsts_rd(fdc) == 0xff)
 		return (ENXIO);
 
 	/*
 	 * Assert a reset to the floppy controller and check that the status
 	 * register goes to zero.
 	 */
 	fdout_wr(fdc, 0);
 	fdout_wr(fdc, 0);
 	if (fdsts_rd(fdc) != 0)
 		return (ENXIO);
 
 	/*
 	 * Clear the reset and see it come ready.
 	 */
 	fdout_wr(fdc, FDO_FRST);
 	DELAY(100);
 	if (fdsts_rd(fdc) != 0x80)
 		return (ENXIO);
 
 	/* Then, see if it can handle a command. */
 	if (fdc_cmd(fdc, 3, NE7CMD_SPECIFY, NE7_SPEC_1(6, 240),
 	    NE7_SPEC_2(31, 0), 0))
 		return (ENXIO);
 
 	/*
 	 * Try to identify the chip.
 	 *
 	 * The i8272 datasheet documents that unknown commands
 	 * will return ST0 as 0x80.  The i8272 is supposedly identical
 	 * to the NEC765.
 	 * The i82077SL datasheet says 0x90 for the VERSION command,
 	 * and several "superio" chips emulate this.
 	 */
 	if (fdc_cmd(fdc, 1, NE7CMD_VERSION, 1, &ic_type))
 		return (ENXIO);
 	if (fdc_cmd(fdc, 1, 0x18, 1, &part_id))
 		return (ENXIO);
 	if (bootverbose)
 		device_printf(dev,
 		    "ic_type %02x part_id %02x\n", ic_type, part_id);
 	switch (ic_type & 0xff) {
 	case 0x80:
 		device_set_desc(dev, "NEC 765 or clone");
 		fdc->fdct = FDC_NE765;
 		break;
 	case 0x81:
 	case 0x90:
 		device_set_desc(dev,
 		    "Enhanced floppy controller");
 		fdc->fdct = FDC_ENHANCED;
 		break;
 	default:
 		device_set_desc(dev, "Generic floppy controller");
 		fdc->fdct = FDC_UNKNOWN;
 		break;
 	}
 	return (0);
 }
 
 int
 fdc_detach(device_t dev)
 {
 	struct	fdc_data *fdc;
 	int	error;
 
 	fdc = device_get_softc(dev);
 
 	/* have our children detached first */
 	if ((error = bus_generic_detach(dev)))
 		return (error);
 
 	if (fdc->fdc_intr)
 		bus_teardown_intr(dev, fdc->res_irq, fdc->fdc_intr);
 	fdc->fdc_intr = NULL;
 
 	/* kill worker thread */
 	mtx_lock(&fdc->fdc_mtx);
 	fdc->flags |= FDC_KTHREAD_EXIT;
 	wakeup(&fdc->head);
 	while ((fdc->flags & FDC_KTHREAD_ALIVE) != 0)
 		msleep(fdc->fdc_thread, &fdc->fdc_mtx, PRIBIO, "fdcdet", 0);
 	mtx_unlock(&fdc->fdc_mtx);
 
 	/* reset controller, turn motor off */
 	fdout_wr(fdc, 0);
 
 	if (!(fdc->flags & FDC_NODMA))
 		isa_dma_release(fdc->dmachan);
 	fdc_release_resources(fdc);
 	mtx_destroy(&fdc->fdc_mtx);
 	return (0);
 }
 
 /*
  * Add a child device to the fdc controller.  It will then be probed etc.
  */
 device_t
 fdc_add_child(device_t dev, const char *name, int unit)
 {
 	struct fdc_ivars *ivar;
 	device_t child;
 
 	ivar = malloc(sizeof *ivar, M_DEVBUF /* XXX */, M_NOWAIT | M_ZERO);
 	if (ivar == NULL)
 		return (NULL);
 	child = device_add_child(dev, name, unit);
 	if (child == NULL) {
 		free(ivar, M_DEVBUF);
 		return (NULL);
 	}
 	device_set_ivars(child, ivar);
 	ivar->fdunit = unit;
 	ivar->fdtype = FDT_NONE;
 	if (resource_disabled(name, unit))
 		device_disable(child);
 	return (child);
 }
 
 int
 fdc_attach(device_t dev)
 {
 	struct	fdc_data *fdc;
 	int	error;
 
 	fdc = device_get_softc(dev);
 	fdc->fdc_dev = dev;
 	error = fdc_initial_reset(dev, fdc);
 	if (error) {
 		device_printf(dev, "does not respond\n");
 		return (error);
 	}
 	error = bus_setup_intr(dev, fdc->res_irq,
 	    INTR_TYPE_BIO | INTR_ENTROPY | 
 	    ((fdc->flags & FDC_NOFAST) ? INTR_MPSAFE : 0),		       
             ((fdc->flags & FDC_NOFAST) ? NULL : fdc_intr_fast), 	    
 	    ((fdc->flags & FDC_NOFAST) ? fdc_intr : NULL), 
 			       fdc, &fdc->fdc_intr);
 	if (error) {
 		device_printf(dev, "cannot setup interrupt\n");
 		return (error);
 	}
 	if (!(fdc->flags & FDC_NODMA)) {
 		error = isa_dma_acquire(fdc->dmachan);
 		if (!error) {
 			error = isa_dma_init(fdc->dmachan,
 			    MAX_BYTES_PER_CYL, M_WAITOK);
 			if (error)
 				isa_dma_release(fdc->dmachan);
 		}
 		if (error)
 			return (error);
 	}
 	fdc->fdcu = device_get_unit(dev);
 	fdc->flags |= FDC_NEEDS_RESET;
 
 	mtx_init(&fdc->fdc_mtx, "fdc lock", NULL, MTX_DEF);
 
 	/* reset controller, turn motor off, clear fdout mirror reg */
 	fdout_wr(fdc, fdc->fdout = 0);
 	bioq_init(&fdc->head);
 
 	settle = hz / 8;
 
 	return (0);
 }
 
 void
 fdc_start_worker(device_t dev)
 {
 	struct	fdc_data *fdc;
 
 	fdc = device_get_softc(dev);
 	kproc_create(fdc_thread, fdc, &fdc->fdc_thread, 0, 0,
 	    "fdc%d", device_get_unit(dev));
 }
 
 int
 fdc_hints_probe(device_t dev)
 {
 	const char *name, *dname;
 	int i, error, dunit;
 
 	/*
 	 * Probe and attach any children.  We should probably detect
 	 * devices from the BIOS unless overridden.
 	 */
 	name = device_get_nameunit(dev);
 	i = 0;
 	while ((resource_find_match(&i, &dname, &dunit, "at", name)) == 0) {
 		resource_int_value(dname, dunit, "drive", &dunit);
 		fdc_add_child(dev, dname, dunit);
 	}
 
 	if ((error = bus_generic_attach(dev)) != 0)
 		return (error);
 	return (0);
 }
 
 int
 fdc_print_child(device_t me, device_t child)
 {
 	int retval = 0, flags;
 
 	retval += bus_print_child_header(me, child);
 	retval += printf(" on %s drive %d", device_get_nameunit(me),
 	       fdc_get_fdunit(child));
 	if ((flags = device_get_flags(me)) != 0)
 		retval += printf(" flags %#x", flags);
 	retval += printf("\n");
 
 	return (retval);
 }
 
 /*
  * Configuration/initialization, per drive.
  */
 static int
 fd_probe(device_t dev)
 {
 	int	unit;
 	int	i;
 	u_int	st0, st3;
 	struct	fd_data *fd;
 	struct	fdc_data *fdc;
 	int	fdsu;
 	int	flags, type;
 
 	fdsu = fdc_get_fdunit(dev);
 	fd = device_get_softc(dev);
 	fdc = device_get_softc(device_get_parent(dev));
 	flags = device_get_flags(dev);
 
 	fd->dev = dev;
 	fd->fdc = fdc;
 	fd->fdsu = fdsu;
 	unit = device_get_unit(dev);
 
 	/* Auto-probe if fdinfo is present, but always allow override. */
 	type = flags & FD_TYPEMASK;
 	if (type == FDT_NONE && (type = fdc_get_fdtype(dev)) != FDT_NONE) {
 		fd->type = type;
 		goto done;
 	} else {
 		/* make sure fdautoselect() will be called */
 		fd->flags = FD_EMPTY;
 		fd->type = type;
 	}
 
 #if defined(__i386__) || defined(__amd64__)
 	if (fd->type == FDT_NONE && (unit == 0 || unit == 1)) {
 		/* Look up what the BIOS thinks we have. */
 		if (unit == 0)
 			fd->type = (rtcin(RTC_FDISKETTE) & 0xf0) >> 4;
 		else
 			fd->type = rtcin(RTC_FDISKETTE) & 0x0f;
 		if (fd->type == FDT_288M_1)
 			fd->type = FDT_288M;
 	}
 #endif /* __i386__ || __amd64__ */
 	/* is there a unit? */
 	if (fd->type == FDT_NONE)
 		return (ENXIO);
 
 	mtx_lock(&fdc->fdc_mtx);
 
 	/* select it */
 	fd_select(fd);
 	fd_motor(fd, 1);
 	fdc->fd = fd;
 	fdc_reset(fdc);		/* XXX reset, then unreset, etc. */
 	DELAY(1000000);	/* 1 sec */
 
 	if ((flags & FD_NO_PROBE) == 0) {
 		/* If we're at track 0 first seek inwards. */
 		if ((fdc_sense_drive(fdc, &st3) == 0) &&
 		    (st3 & NE7_ST3_T0)) {
 			/* Seek some steps... */
 			if (fdc_cmd(fdc, 3, NE7CMD_SEEK, fdsu, 10, 0) == 0) {
 				/* ...wait a moment... */
 				DELAY(300000);
 				/* make ctrlr happy: */
 				fdc_sense_int(fdc, NULL, NULL);
 			}
 		}
 
 		for (i = 0; i < 2; i++) {
 			/*
 			 * we must recalibrate twice, just in case the
 			 * heads have been beyond cylinder 76, since
 			 * most FDCs still barf when attempting to
 			 * recalibrate more than 77 steps
 			 */
 			/* go back to 0: */
 			if (fdc_cmd(fdc, 2, NE7CMD_RECAL, fdsu, 0) == 0) {
 				/* a second being enough for full stroke seek*/
 				DELAY(i == 0 ? 1000000 : 300000);
 
 				/* anything responding? */
 				if (fdc_sense_int(fdc, &st0, NULL) == 0 &&
 				    (st0 & NE7_ST0_EC) == 0)
 					break; /* already probed successfully */
 			}
 		}
 	}
 
 	fd_motor(fd, 0);
 	fdc->fd = NULL;
 	mtx_unlock(&fdc->fdc_mtx);
 
 	if ((flags & FD_NO_PROBE) == 0 &&
 	    (st0 & NE7_ST0_EC) != 0) /* no track 0 -> no drive present */
 		return (ENXIO);
 
 done:
 
 	switch (fd->type) {
 	case FDT_12M:
 		device_set_desc(dev, "1200-KB 5.25\" drive");
 		break;
 	case FDT_144M:
 		device_set_desc(dev, "1440-KB 3.5\" drive");
 		break;
 	case FDT_288M:
 		device_set_desc(dev, "2880-KB 3.5\" drive (in 1440-KB mode)");
 		break;
 	case FDT_360K:
 		device_set_desc(dev, "360-KB 5.25\" drive");
 		break;
 	case FDT_720K:
 		device_set_desc(dev, "720-KB 3.5\" drive");
 		break;
 	default:
 		return (ENXIO);
 	}
 	fd->track = FD_NO_TRACK;
 	fd->fdc = fdc;
 	fd->fdsu = fdsu;
 	fd->options = 0;
 	callout_init_mtx(&fd->toffhandle, &fd->fdc->fdc_mtx, 0);
 
 	/* initialize densities for subdevices */
 	fdsettype(fd, fd_native_types[fd->type]);
 	return (0);
 }
 
 /*
  * We have to do this in a geom event because GEOM is not running
  * when fd_attach() is.
  * XXX: move fd_attach after geom like ata/scsi disks
  */
 static void
 fd_attach2(void *arg, int flag)
 {
 	struct	fd_data *fd;
 
 	fd = arg;
 
 	fd->fd_geom = g_new_geomf(&g_fd_class,
 	    "fd%d", device_get_unit(fd->dev));
 	fd->fd_provider = g_new_providerf(fd->fd_geom, "%s", fd->fd_geom->name);
 	fd->fd_geom->softc = fd;
 	g_error_provider(fd->fd_provider, 0);
 }
 
 static int
 fd_attach(device_t dev)
 {
 	struct	fd_data *fd;
 
 	fd = device_get_softc(dev);
 	g_post_event(fd_attach2, fd, M_WAITOK, NULL);
 	fd->flags |= FD_EMPTY;
 	bioq_init(&fd->fd_bq);
 
 	return (0);
 }
 
 static void
 fd_detach_geom(void *arg, int flag)
 {
 	struct	fd_data *fd = arg;
 
 	g_topology_assert();
 	g_wither_geom(fd->fd_geom, ENXIO);
 }
 
 static int
 fd_detach(device_t dev)
 {
 	struct	fd_data *fd;
 
 	fd = device_get_softc(dev);
 	g_waitfor_event(fd_detach_geom, fd, M_WAITOK, NULL);
 	while (device_get_state(dev) == DS_BUSY)
 		tsleep(fd, PZERO, "fdd", hz/10);
 	callout_drain(&fd->toffhandle);
 
 	return (0);
 }
 
 static device_method_t fd_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		fd_probe),
 	DEVMETHOD(device_attach,	fd_attach),
 	DEVMETHOD(device_detach,	fd_detach),
 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
 	DEVMETHOD(device_suspend,	bus_generic_suspend), /* XXX */
 	DEVMETHOD(device_resume,	bus_generic_resume), /* XXX */
 	{ 0, 0 }
 };
 
 static driver_t fd_driver = {
 	"fd",
 	fd_methods,
 	sizeof(struct fd_data)
 };
 
 static int
 fdc_modevent(module_t mod, int type, void *data)
 {
 
 	return (g_modevent(NULL, type, &g_fd_class));
 }
 
 DRIVER_MODULE(fd, fdc, fd_driver, fd_devclass, fdc_modevent, 0);
Index: head/sys/geom/cache/g_cache.c
===================================================================
--- head/sys/geom/cache/g_cache.c	(revision 350693)
+++ head/sys/geom/cache/g_cache.c	(revision 350694)
@@ -1,1019 +1,1020 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2006 Ruslan Ermilov <ru@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/bio.h>
 #include <sys/sysctl.h>
 #include <sys/malloc.h>
 #include <sys/queue.h>
 #include <sys/sbuf.h>
 #include <sys/time.h>
 #include <vm/uma.h>
 #include <geom/geom.h>
+#include <geom/geom_dbg.h>
 #include <geom/cache/g_cache.h>
 
 FEATURE(geom_cache, "GEOM cache module");
 
 static MALLOC_DEFINE(M_GCACHE, "gcache_data", "GEOM_CACHE Data");
 
 SYSCTL_DECL(_kern_geom);
 static SYSCTL_NODE(_kern_geom, OID_AUTO, cache, CTLFLAG_RW, 0,
     "GEOM_CACHE stuff");
 static u_int g_cache_debug = 0;
 SYSCTL_UINT(_kern_geom_cache, OID_AUTO, debug, CTLFLAG_RW, &g_cache_debug, 0,
     "Debug level");
 static u_int g_cache_enable = 1;
 SYSCTL_UINT(_kern_geom_cache, OID_AUTO, enable, CTLFLAG_RW, &g_cache_enable, 0,
     "");
 static u_int g_cache_timeout = 10;
 SYSCTL_UINT(_kern_geom_cache, OID_AUTO, timeout, CTLFLAG_RW, &g_cache_timeout,
     0, "");
 static u_int g_cache_idletime = 5;
 SYSCTL_UINT(_kern_geom_cache, OID_AUTO, idletime, CTLFLAG_RW, &g_cache_idletime,
     0, "");
 static u_int g_cache_used_lo = 5;
 static u_int g_cache_used_hi = 20;
 static int
 sysctl_handle_pct(SYSCTL_HANDLER_ARGS)
 {
 	u_int val = *(u_int *)arg1;
 	int error;
 
 	error = sysctl_handle_int(oidp, &val, 0, req);
 	if (error || !req->newptr)
 		return (error);
 	if (val > 100)
 		return (EINVAL);
 	if ((arg1 == &g_cache_used_lo && val > g_cache_used_hi) ||
 	    (arg1 == &g_cache_used_hi && g_cache_used_lo > val))
 		return (EINVAL);
 	*(u_int *)arg1 = val;
 	return (0);
 }
 SYSCTL_PROC(_kern_geom_cache, OID_AUTO, used_lo, CTLTYPE_UINT|CTLFLAG_RW,
 	&g_cache_used_lo, 0, sysctl_handle_pct, "IU", "");
 SYSCTL_PROC(_kern_geom_cache, OID_AUTO, used_hi, CTLTYPE_UINT|CTLFLAG_RW,
 	&g_cache_used_hi, 0, sysctl_handle_pct, "IU", "");
 
 
 static int g_cache_destroy(struct g_cache_softc *sc, boolean_t force);
 static g_ctl_destroy_geom_t g_cache_destroy_geom;
 
 static g_taste_t g_cache_taste;
 static g_ctl_req_t g_cache_config;
 static g_dumpconf_t g_cache_dumpconf;
 
 struct g_class g_cache_class = {
 	.name = G_CACHE_CLASS_NAME,
 	.version = G_VERSION,
 	.ctlreq = g_cache_config,
 	.taste = g_cache_taste,
 	.destroy_geom = g_cache_destroy_geom
 };
 
 #define	OFF2BNO(off, sc)	((off) >> (sc)->sc_bshift)
 #define	BNO2OFF(bno, sc)	((bno) << (sc)->sc_bshift)
 
 
 static struct g_cache_desc *
 g_cache_alloc(struct g_cache_softc *sc)
 {
 	struct g_cache_desc *dp;
 
 	mtx_assert(&sc->sc_mtx, MA_OWNED);
 
 	if (!TAILQ_EMPTY(&sc->sc_usedlist)) {
 		dp = TAILQ_FIRST(&sc->sc_usedlist);
 		TAILQ_REMOVE(&sc->sc_usedlist, dp, d_used);
 		sc->sc_nused--;
 		dp->d_flags = 0;
 		LIST_REMOVE(dp, d_next);
 		return (dp);
 	}
 	if (sc->sc_nent > sc->sc_maxent) {
 		sc->sc_cachefull++;
 		return (NULL);
 	}
 	dp = malloc(sizeof(*dp), M_GCACHE, M_NOWAIT | M_ZERO);
 	if (dp == NULL)
 		return (NULL);
 	dp->d_data = uma_zalloc(sc->sc_zone, M_NOWAIT);
 	if (dp->d_data == NULL) {
 		free(dp, M_GCACHE);
 		return (NULL);
 	}
 	sc->sc_nent++;
 	return (dp);
 }
 
 static void
 g_cache_free(struct g_cache_softc *sc, struct g_cache_desc *dp)
 {
 
 	mtx_assert(&sc->sc_mtx, MA_OWNED);
 
 	uma_zfree(sc->sc_zone, dp->d_data);
 	free(dp, M_GCACHE);
 	sc->sc_nent--;
 }
 
 static void
 g_cache_free_used(struct g_cache_softc *sc)
 {
 	struct g_cache_desc *dp;
 	u_int n;
 
 	mtx_assert(&sc->sc_mtx, MA_OWNED);
 
 	n = g_cache_used_lo * sc->sc_maxent / 100;
 	while (sc->sc_nused > n) {
 		KASSERT(!TAILQ_EMPTY(&sc->sc_usedlist), ("used list empty"));
 		dp = TAILQ_FIRST(&sc->sc_usedlist);
 		TAILQ_REMOVE(&sc->sc_usedlist, dp, d_used);
 		sc->sc_nused--;
 		LIST_REMOVE(dp, d_next);
 		g_cache_free(sc, dp);
 	}
 }
 
 static void
 g_cache_deliver(struct g_cache_softc *sc, struct bio *bp,
     struct g_cache_desc *dp, int error)
 {
 	off_t off1, off, len;
 
 	mtx_assert(&sc->sc_mtx, MA_OWNED);
 	KASSERT(OFF2BNO(bp->bio_offset, sc) <= dp->d_bno, ("wrong entry"));
 	KASSERT(OFF2BNO(bp->bio_offset + bp->bio_length - 1, sc) >=
 	    dp->d_bno, ("wrong entry"));
 
 	off1 = BNO2OFF(dp->d_bno, sc);
 	off = MAX(bp->bio_offset, off1);
 	len = MIN(bp->bio_offset + bp->bio_length, off1 + sc->sc_bsize) - off;
 
 	if (bp->bio_error == 0)
 		bp->bio_error = error;
 	if (bp->bio_error == 0) {
 		bcopy(dp->d_data + (off - off1),
 		    bp->bio_data + (off - bp->bio_offset), len);
 	}
 	bp->bio_completed += len;
 	KASSERT(bp->bio_completed <= bp->bio_length, ("extra data"));
 	if (bp->bio_completed == bp->bio_length) {
 		if (bp->bio_error != 0)
 			bp->bio_completed = 0;
 		g_io_deliver(bp, bp->bio_error);
 	}
 
 	if (dp->d_flags & D_FLAG_USED) {
 		TAILQ_REMOVE(&sc->sc_usedlist, dp, d_used);
 		TAILQ_INSERT_TAIL(&sc->sc_usedlist, dp, d_used);
 	} else if (OFF2BNO(off + len, sc) > dp->d_bno) {
 		TAILQ_INSERT_TAIL(&sc->sc_usedlist, dp, d_used);
 		sc->sc_nused++;
 		dp->d_flags |= D_FLAG_USED;
 	}
 	dp->d_atime = time_uptime;
 }
 
 static void
 g_cache_done(struct bio *bp)
 {
 	struct g_cache_softc *sc;
 	struct g_cache_desc *dp;
 	struct bio *bp2, *tmpbp;
 
 	sc = bp->bio_from->geom->softc;
 	KASSERT(G_CACHE_DESC1(bp) == sc, ("corrupt bio_caller in g_cache_done()"));
 	dp = G_CACHE_DESC2(bp);
 	mtx_lock(&sc->sc_mtx);
 	bp2 = dp->d_biolist;
 	while (bp2 != NULL) {
 		KASSERT(G_CACHE_NEXT_BIO1(bp2) == sc, ("corrupt bio_driver in g_cache_done()"));
 		tmpbp = G_CACHE_NEXT_BIO2(bp2);
 		g_cache_deliver(sc, bp2, dp, bp->bio_error);
 		bp2 = tmpbp;
 	}
 	dp->d_biolist = NULL;
 	if (dp->d_flags & D_FLAG_INVALID) {
 		sc->sc_invalid--;
 		g_cache_free(sc, dp);
 	} else if (bp->bio_error) {
 		LIST_REMOVE(dp, d_next);
 		if (dp->d_flags & D_FLAG_USED) {
 			TAILQ_REMOVE(&sc->sc_usedlist, dp, d_used);
 			sc->sc_nused--;
 		}
 		g_cache_free(sc, dp);
 	}
 	mtx_unlock(&sc->sc_mtx);
 	g_destroy_bio(bp);
 }
 
 static struct g_cache_desc *
 g_cache_lookup(struct g_cache_softc *sc, off_t bno)
 {
 	struct g_cache_desc *dp;
 
 	mtx_assert(&sc->sc_mtx, MA_OWNED);
 
 	LIST_FOREACH(dp, &sc->sc_desclist[G_CACHE_BUCKET(bno)], d_next)
 		if (dp->d_bno == bno)
 			return (dp);
 	return (NULL);
 }
 
 static int
 g_cache_read(struct g_cache_softc *sc, struct bio *bp)
 {
 	struct bio *cbp;
 	struct g_cache_desc *dp;
 
 	mtx_lock(&sc->sc_mtx);
 	dp = g_cache_lookup(sc,
 	    OFF2BNO(bp->bio_offset + bp->bio_completed, sc));
 	if (dp != NULL) {
 		/* Add to waiters list or deliver. */
 		sc->sc_cachehits++;
 		if (dp->d_biolist != NULL) {
 			G_CACHE_NEXT_BIO1(bp) = sc;
 			G_CACHE_NEXT_BIO2(bp) = dp->d_biolist;
 			dp->d_biolist = bp;
 		} else
 			g_cache_deliver(sc, bp, dp, 0);
 		mtx_unlock(&sc->sc_mtx);
 		return (0);
 	}
 
 	/* Cache miss.  Allocate entry and schedule bio.  */
 	sc->sc_cachemisses++;
 	dp = g_cache_alloc(sc);
 	if (dp == NULL) {
 		mtx_unlock(&sc->sc_mtx);
 		return (ENOMEM);
 	}
 	cbp = g_clone_bio(bp);
 	if (cbp == NULL) {
 		g_cache_free(sc, dp);
 		mtx_unlock(&sc->sc_mtx);
 		return (ENOMEM);
 	}
 
 	dp->d_bno = OFF2BNO(bp->bio_offset + bp->bio_completed, sc);
 	G_CACHE_NEXT_BIO1(bp) = sc;
 	G_CACHE_NEXT_BIO2(bp) = NULL;
 	dp->d_biolist = bp;
 	LIST_INSERT_HEAD(&sc->sc_desclist[G_CACHE_BUCKET(dp->d_bno)],
 	    dp, d_next);
 	mtx_unlock(&sc->sc_mtx);
 
 	G_CACHE_DESC1(cbp) = sc;
 	G_CACHE_DESC2(cbp) = dp;
 	cbp->bio_done = g_cache_done;
 	cbp->bio_offset = BNO2OFF(dp->d_bno, sc);
 	cbp->bio_data = dp->d_data;
 	cbp->bio_length = sc->sc_bsize;
 	g_io_request(cbp, LIST_FIRST(&bp->bio_to->geom->consumer));
 	return (0);
 }
 
 static void
 g_cache_invalidate(struct g_cache_softc *sc, struct bio *bp)
 {
 	struct g_cache_desc *dp;
 	off_t bno, lim;
 
 	mtx_lock(&sc->sc_mtx);
 	bno = OFF2BNO(bp->bio_offset, sc);
 	lim = OFF2BNO(bp->bio_offset + bp->bio_length - 1, sc);
 	do {
 		if ((dp = g_cache_lookup(sc, bno)) != NULL) {
 			LIST_REMOVE(dp, d_next);
 			if (dp->d_flags & D_FLAG_USED) {
 				TAILQ_REMOVE(&sc->sc_usedlist, dp, d_used);
 				sc->sc_nused--;
 			}
 			if (dp->d_biolist == NULL)
 				g_cache_free(sc, dp);
 			else {
 				dp->d_flags = D_FLAG_INVALID;
 				sc->sc_invalid++;
 			}
 		}
 		bno++;
 	} while (bno <= lim);
 	mtx_unlock(&sc->sc_mtx);
 }
 
 static void
 g_cache_start(struct bio *bp)
 {
 	struct g_cache_softc *sc;
 	struct g_geom *gp;
 	struct g_cache_desc *dp;
 	struct bio *cbp;
 
 	gp = bp->bio_to->geom;
 	sc = gp->softc;
 	G_CACHE_LOGREQ(bp, "Request received.");
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 		sc->sc_reads++;
 		sc->sc_readbytes += bp->bio_length;
 		if (!g_cache_enable)
 			break;
 		if (bp->bio_offset + bp->bio_length > sc->sc_tail)
 			break;
 		if (OFF2BNO(bp->bio_offset, sc) ==
 		    OFF2BNO(bp->bio_offset + bp->bio_length - 1, sc)) {
 			sc->sc_cachereads++;
 			sc->sc_cachereadbytes += bp->bio_length;
 			if (g_cache_read(sc, bp) == 0)
 				return;
 			sc->sc_cachereads--;
 			sc->sc_cachereadbytes -= bp->bio_length;
 			break;
 		} else if (OFF2BNO(bp->bio_offset, sc) + 1 ==
 		    OFF2BNO(bp->bio_offset + bp->bio_length - 1, sc)) {
 			mtx_lock(&sc->sc_mtx);
 			dp = g_cache_lookup(sc, OFF2BNO(bp->bio_offset, sc));
 			if (dp == NULL || dp->d_biolist != NULL) {
 				mtx_unlock(&sc->sc_mtx);
 				break;
 			}
 			sc->sc_cachereads++;
 			sc->sc_cachereadbytes += bp->bio_length;
 			g_cache_deliver(sc, bp, dp, 0);
 			mtx_unlock(&sc->sc_mtx);
 			if (g_cache_read(sc, bp) == 0)
 				return;
 			sc->sc_cachereads--;
 			sc->sc_cachereadbytes -= bp->bio_length;
 			break;
 		}
 		break;
 	case BIO_WRITE:
 		sc->sc_writes++;
 		sc->sc_wrotebytes += bp->bio_length;
 		g_cache_invalidate(sc, bp);
 		break;
 	}
 	cbp = g_clone_bio(bp);
 	if (cbp == NULL) {
 		g_io_deliver(bp, ENOMEM);
 		return;
 	}
 	cbp->bio_done = g_std_done;
 	G_CACHE_LOGREQ(cbp, "Sending request.");
 	g_io_request(cbp, LIST_FIRST(&gp->consumer));
 }
 
 static void
 g_cache_go(void *arg)
 {
 	struct g_cache_softc *sc = arg;
 	struct g_cache_desc *dp;
 	int i;
 
 	mtx_assert(&sc->sc_mtx, MA_OWNED);
 
 	/* Forcibly mark idle ready entries as used. */
 	for (i = 0; i < G_CACHE_BUCKETS; i++) {
 		LIST_FOREACH(dp, &sc->sc_desclist[i], d_next) {
 			if (dp->d_flags & D_FLAG_USED ||
 			    dp->d_biolist != NULL ||
 			    time_uptime - dp->d_atime < g_cache_idletime)
 				continue;
 			TAILQ_INSERT_TAIL(&sc->sc_usedlist, dp, d_used);
 			sc->sc_nused++;
 			dp->d_flags |= D_FLAG_USED;
 		}
 	}
 
 	/* Keep the number of used entries low. */
 	if (sc->sc_nused > g_cache_used_hi * sc->sc_maxent / 100)
 		g_cache_free_used(sc);
 
 	callout_reset(&sc->sc_callout, g_cache_timeout * hz, g_cache_go, sc);
 }
 
 static int
 g_cache_access(struct g_provider *pp, int dr, int dw, int de)
 {
 	struct g_geom *gp;
 	struct g_consumer *cp;
 	int error;
 
 	gp = pp->geom;
 	cp = LIST_FIRST(&gp->consumer);
 	error = g_access(cp, dr, dw, de);
 
 	return (error);
 }
 
 static void
 g_cache_orphan(struct g_consumer *cp)
 {
 
 	g_topology_assert();
 	g_cache_destroy(cp->geom->softc, 1);
 }
 
 static struct g_cache_softc *
 g_cache_find_device(struct g_class *mp, const char *name)
 {
 	struct g_geom *gp;
 
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		if (strcmp(gp->name, name) == 0)
 			return (gp->softc);
 	}
 	return (NULL);
 }
 
 static struct g_geom *
 g_cache_create(struct g_class *mp, struct g_provider *pp,
     const struct g_cache_metadata *md, u_int type)
 {
 	struct g_cache_softc *sc;
 	struct g_geom *gp;
 	struct g_provider *newpp;
 	struct g_consumer *cp;
 	u_int bshift;
 	int i;
 
 	g_topology_assert();
 
 	gp = NULL;
 	newpp = NULL;
 	cp = NULL;
 
 	G_CACHE_DEBUG(1, "Creating device %s.", md->md_name);
 
 	/* Cache size is minimum 100. */
 	if (md->md_size < 100) {
 		G_CACHE_DEBUG(0, "Invalid size for device %s.", md->md_name);
 		return (NULL);
 	}
 
 	/* Block size restrictions. */
 	bshift = ffs(md->md_bsize) - 1;
 	if (md->md_bsize == 0 || md->md_bsize > MAXPHYS ||
 	    md->md_bsize != 1 << bshift ||
 	    (md->md_bsize % pp->sectorsize) != 0) {
 		G_CACHE_DEBUG(0, "Invalid blocksize for provider %s.", pp->name);
 		return (NULL);
 	}
 
 	/* Check for duplicate unit. */
 	if (g_cache_find_device(mp, (const char *)&md->md_name) != NULL) {
 		G_CACHE_DEBUG(0, "Provider %s already exists.", md->md_name);
 		return (NULL);
 	}
 
 	gp = g_new_geomf(mp, "%s", md->md_name);
 	sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO);
 	sc->sc_type = type;
 	sc->sc_bshift = bshift;
 	sc->sc_bsize = 1 << bshift;
 	sc->sc_zone = uma_zcreate("gcache", sc->sc_bsize, NULL, NULL, NULL, NULL,
 	    UMA_ALIGN_PTR, 0);
 	mtx_init(&sc->sc_mtx, "GEOM CACHE mutex", NULL, MTX_DEF);
 	for (i = 0; i < G_CACHE_BUCKETS; i++)
 		LIST_INIT(&sc->sc_desclist[i]);
 	TAILQ_INIT(&sc->sc_usedlist);
 	sc->sc_maxent = md->md_size;
 	callout_init_mtx(&sc->sc_callout, &sc->sc_mtx, 0);
 	gp->softc = sc;
 	sc->sc_geom = gp;
 	gp->start = g_cache_start;
 	gp->orphan = g_cache_orphan;
 	gp->access = g_cache_access;
 	gp->dumpconf = g_cache_dumpconf;
 
 	newpp = g_new_providerf(gp, "cache/%s", gp->name);
 	newpp->sectorsize = pp->sectorsize;
 	newpp->mediasize = pp->mediasize;
 	if (type == G_CACHE_TYPE_AUTOMATIC)
 		newpp->mediasize -= pp->sectorsize;
 	sc->sc_tail = BNO2OFF(OFF2BNO(newpp->mediasize, sc), sc);
 
 	cp = g_new_consumer(gp);
 	if (g_attach(cp, pp) != 0) {
 		G_CACHE_DEBUG(0, "Cannot attach to provider %s.", pp->name);
 		g_destroy_consumer(cp);
 		g_destroy_provider(newpp);
 		mtx_destroy(&sc->sc_mtx);
 		g_free(sc);
 		g_destroy_geom(gp);
 		return (NULL);
 	}
 
 	g_error_provider(newpp, 0);
 	G_CACHE_DEBUG(0, "Device %s created.", gp->name);
 	callout_reset(&sc->sc_callout, g_cache_timeout * hz, g_cache_go, sc);
 	return (gp);
 }
 
 static int
 g_cache_destroy(struct g_cache_softc *sc, boolean_t force)
 {
 	struct g_geom *gp;
 	struct g_provider *pp;
 	struct g_cache_desc *dp, *dp2;
 	int i;
 
 	g_topology_assert();
 	if (sc == NULL)
 		return (ENXIO);
 	gp = sc->sc_geom;
 	pp = LIST_FIRST(&gp->provider);
 	if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
 		if (force) {
 			G_CACHE_DEBUG(0, "Device %s is still open, so it "
 			    "can't be definitely removed.", pp->name);
 		} else {
 			G_CACHE_DEBUG(1, "Device %s is still open (r%dw%de%d).",
 			    pp->name, pp->acr, pp->acw, pp->ace);
 			return (EBUSY);
 		}
 	} else {
 		G_CACHE_DEBUG(0, "Device %s removed.", gp->name);
 	}
 	callout_drain(&sc->sc_callout);
 	mtx_lock(&sc->sc_mtx);
 	for (i = 0; i < G_CACHE_BUCKETS; i++) {
 		dp = LIST_FIRST(&sc->sc_desclist[i]);
 		while (dp != NULL) {
 			dp2 = LIST_NEXT(dp, d_next);
 			g_cache_free(sc, dp);
 			dp = dp2;
 		}
 	}
 	mtx_unlock(&sc->sc_mtx);
 	mtx_destroy(&sc->sc_mtx);
 	uma_zdestroy(sc->sc_zone);
 	g_free(sc);
 	gp->softc = NULL;
 	g_wither_geom(gp, ENXIO);
 
 	return (0);
 }
 
 static int
 g_cache_destroy_geom(struct gctl_req *req, struct g_class *mp, struct g_geom *gp)
 {
 
 	return (g_cache_destroy(gp->softc, 0));
 }
 
 static int
 g_cache_read_metadata(struct g_consumer *cp, struct g_cache_metadata *md)
 {
 	struct g_provider *pp;
 	u_char *buf;
 	int error;
 
 	g_topology_assert();
 
 	error = g_access(cp, 1, 0, 0);
 	if (error != 0)
 		return (error);
 	pp = cp->provider;
 	g_topology_unlock();
 	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
 	    &error);
 	g_topology_lock();
 	g_access(cp, -1, 0, 0);
 	if (buf == NULL)
 		return (error);
 
 	/* Decode metadata. */
 	cache_metadata_decode(buf, md);
 	g_free(buf);
 
 	return (0);
 }
 
 static int
 g_cache_write_metadata(struct g_consumer *cp, struct g_cache_metadata *md)
 {
 	struct g_provider *pp;
 	u_char *buf;
 	int error;
 
 	g_topology_assert();
 
 	error = g_access(cp, 0, 1, 0);
 	if (error != 0)
 		return (error);
 	pp = cp->provider;
 	buf = malloc((size_t)pp->sectorsize, M_GCACHE, M_WAITOK | M_ZERO);
 	cache_metadata_encode(md, buf);
 	g_topology_unlock();
 	error = g_write_data(cp, pp->mediasize - pp->sectorsize, buf, pp->sectorsize);
 	g_topology_lock();
 	g_access(cp, 0, -1, 0);
 	free(buf, M_GCACHE);
 
 	return (error);
 }
 
 static struct g_geom *
 g_cache_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
 {
 	struct g_cache_metadata md;
 	struct g_consumer *cp;
 	struct g_geom *gp;
 	int error;
 
 	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
 	g_topology_assert();
 
 	G_CACHE_DEBUG(3, "Tasting %s.", pp->name);
 
 	gp = g_new_geomf(mp, "cache:taste");
 	gp->start = g_cache_start;
 	gp->orphan = g_cache_orphan;
 	gp->access = g_cache_access;
 	cp = g_new_consumer(gp);
 	g_attach(cp, pp);
 	error = g_cache_read_metadata(cp, &md);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 	g_destroy_geom(gp);
 	if (error != 0)
 		return (NULL);
 
 	if (strcmp(md.md_magic, G_CACHE_MAGIC) != 0)
 		return (NULL);
 	if (md.md_version > G_CACHE_VERSION) {
 		printf("geom_cache.ko module is too old to handle %s.\n",
 		    pp->name);
 		return (NULL);
 	}
 	if (md.md_provsize != pp->mediasize)
 		return (NULL);
 
 	gp = g_cache_create(mp, pp, &md, G_CACHE_TYPE_AUTOMATIC);
 	if (gp == NULL) {
 		G_CACHE_DEBUG(0, "Can't create %s.", md.md_name);
 		return (NULL);
 	}
 	return (gp);
 }
 
 static void
 g_cache_ctl_create(struct gctl_req *req, struct g_class *mp)
 {
 	struct g_cache_metadata md;
 	struct g_provider *pp;
 	struct g_geom *gp;
 	intmax_t *bsize, *size;
 	const char *name;
 	int *nargs;
 
 	g_topology_assert();
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "No '%s' argument", "nargs");
 		return;
 	}
 	if (*nargs != 2) {
 		gctl_error(req, "Invalid number of arguments.");
 		return;
 	}
 
 	strlcpy(md.md_magic, G_CACHE_MAGIC, sizeof(md.md_magic));
 	md.md_version = G_CACHE_VERSION;
 	name = gctl_get_asciiparam(req, "arg0");
 	if (name == NULL) {
 		gctl_error(req, "No 'arg0' argument");
 		return;
 	}
 	strlcpy(md.md_name, name, sizeof(md.md_name));
 
 	size = gctl_get_paraml(req, "size", sizeof(*size));
 	if (size == NULL) {
 		gctl_error(req, "No '%s' argument", "size");
 		return;
 	}
 	if ((u_int)*size < 100) {
 		gctl_error(req, "Invalid '%s' argument", "size");
 		return;
 	}
 	md.md_size = (u_int)*size;
 
 	bsize = gctl_get_paraml(req, "blocksize", sizeof(*bsize));
 	if (bsize == NULL) {
 		gctl_error(req, "No '%s' argument", "blocksize");
 		return;
 	}
 	if (*bsize < 0) {
 		gctl_error(req, "Invalid '%s' argument", "blocksize");
 		return;
 	}
 	md.md_bsize = (u_int)*bsize;
 
 	/* This field is not important here. */
 	md.md_provsize = 0;
 
 	name = gctl_get_asciiparam(req, "arg1");
 	if (name == NULL) {
 		gctl_error(req, "No 'arg1' argument");
 		return;
 	}
 	if (strncmp(name, "/dev/", strlen("/dev/")) == 0)
 		name += strlen("/dev/");
 	pp = g_provider_by_name(name);
 	if (pp == NULL) {
 		G_CACHE_DEBUG(1, "Provider %s is invalid.", name);
 		gctl_error(req, "Provider %s is invalid.", name);
 		return;
 	}
 	gp = g_cache_create(mp, pp, &md, G_CACHE_TYPE_MANUAL);
 	if (gp == NULL) {
 		gctl_error(req, "Can't create %s.", md.md_name);
 		return;
 	}
 }
 
 static void
 g_cache_ctl_configure(struct gctl_req *req, struct g_class *mp)
 {
 	struct g_cache_metadata md;
 	struct g_cache_softc *sc;
 	struct g_consumer *cp;
 	intmax_t *bsize, *size;
 	const char *name;
 	int error, *nargs;
 
 	g_topology_assert();
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "No '%s' argument", "nargs");
 		return;
 	}
 	if (*nargs != 1) {
 		gctl_error(req, "Missing device.");
 		return;
 	}
 
 	name = gctl_get_asciiparam(req, "arg0");
 	if (name == NULL) {
 		gctl_error(req, "No 'arg0' argument");
 		return;
 	}
 	sc = g_cache_find_device(mp, name);
 	if (sc == NULL) {
 		G_CACHE_DEBUG(1, "Device %s is invalid.", name);
 		gctl_error(req, "Device %s is invalid.", name);
 		return;
 	}
 
 	size = gctl_get_paraml(req, "size", sizeof(*size));
 	if (size == NULL) {
 		gctl_error(req, "No '%s' argument", "size");
 		return;
 	}
 	if ((u_int)*size != 0 && (u_int)*size < 100) {
 		gctl_error(req, "Invalid '%s' argument", "size");
 		return;
 	}
 	if ((u_int)*size != 0)
 		sc->sc_maxent = (u_int)*size;
 
 	bsize = gctl_get_paraml(req, "blocksize", sizeof(*bsize));
 	if (bsize == NULL) {
 		gctl_error(req, "No '%s' argument", "blocksize");
 		return;
 	}
 	if (*bsize < 0) {
 		gctl_error(req, "Invalid '%s' argument", "blocksize");
 		return;
 	}
 
 	if (sc->sc_type != G_CACHE_TYPE_AUTOMATIC)
 		return;
 
 	strlcpy(md.md_name, name, sizeof(md.md_name));
 	strlcpy(md.md_magic, G_CACHE_MAGIC, sizeof(md.md_magic));
 	md.md_version = G_CACHE_VERSION;
 	if ((u_int)*size != 0)
 		md.md_size = (u_int)*size;
 	else
 		md.md_size = sc->sc_maxent;
 	if ((u_int)*bsize != 0)
 		md.md_bsize = (u_int)*bsize;
 	else
 		md.md_bsize = sc->sc_bsize;
 	cp = LIST_FIRST(&sc->sc_geom->consumer);
 	md.md_provsize = cp->provider->mediasize;
 	error = g_cache_write_metadata(cp, &md);
 	if (error == 0)
 		G_CACHE_DEBUG(2, "Metadata on %s updated.", cp->provider->name);
 	else
 		G_CACHE_DEBUG(0, "Cannot update metadata on %s (error=%d).",
 		    cp->provider->name, error);
 }
 
 static void
 g_cache_ctl_destroy(struct gctl_req *req, struct g_class *mp)
 {
 	int *nargs, *force, error, i;
 	struct g_cache_softc *sc;
 	const char *name;
 	char param[16];
 
 	g_topology_assert();
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "No '%s' argument", "nargs");
 		return;
 	}
 	if (*nargs <= 0) {
 		gctl_error(req, "Missing device(s).");
 		return;
 	}
 	force = gctl_get_paraml(req, "force", sizeof(*force));
 	if (force == NULL) {
 		gctl_error(req, "No 'force' argument");
 		return;
 	}
 
 	for (i = 0; i < *nargs; i++) {
 		snprintf(param, sizeof(param), "arg%d", i);
 		name = gctl_get_asciiparam(req, param);
 		if (name == NULL) {
 			gctl_error(req, "No 'arg%d' argument", i);
 			return;
 		}
 		sc = g_cache_find_device(mp, name);
 		if (sc == NULL) {
 			G_CACHE_DEBUG(1, "Device %s is invalid.", name);
 			gctl_error(req, "Device %s is invalid.", name);
 			return;
 		}
 		error = g_cache_destroy(sc, *force);
 		if (error != 0) {
 			gctl_error(req, "Cannot destroy device %s (error=%d).",
 			    sc->sc_name, error);
 			return;
 		}
 	}
 }
 
 static void
 g_cache_ctl_reset(struct gctl_req *req, struct g_class *mp)
 {
 	struct g_cache_softc *sc;
 	const char *name;
 	char param[16];
 	int i, *nargs;
 
 	g_topology_assert();
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "No '%s' argument", "nargs");
 		return;
 	}
 	if (*nargs <= 0) {
 		gctl_error(req, "Missing device(s).");
 		return;
 	}
 
 	for (i = 0; i < *nargs; i++) {
 		snprintf(param, sizeof(param), "arg%d", i);
 		name = gctl_get_asciiparam(req, param);
 		if (name == NULL) {
 			gctl_error(req, "No 'arg%d' argument", i);
 			return;
 		}
 		sc = g_cache_find_device(mp, name);
 		if (sc == NULL) {
 			G_CACHE_DEBUG(1, "Device %s is invalid.", name);
 			gctl_error(req, "Device %s is invalid.", name);
 			return;
 		}
 		sc->sc_reads = 0;
 		sc->sc_readbytes = 0;
 		sc->sc_cachereads = 0;
 		sc->sc_cachereadbytes = 0;
 		sc->sc_cachehits = 0;
 		sc->sc_cachemisses = 0;
 		sc->sc_cachefull = 0;
 		sc->sc_writes = 0;
 		sc->sc_wrotebytes = 0;
 	}
 }
 
 static void
 g_cache_config(struct gctl_req *req, struct g_class *mp, const char *verb)
 {
 	uint32_t *version;
 
 	g_topology_assert();
 
 	version = gctl_get_paraml(req, "version", sizeof(*version));
 	if (version == NULL) {
 		gctl_error(req, "No '%s' argument.", "version");
 		return;
 	}
 	if (*version != G_CACHE_VERSION) {
 		gctl_error(req, "Userland and kernel parts are out of sync.");
 		return;
 	}
 
 	if (strcmp(verb, "create") == 0) {
 		g_cache_ctl_create(req, mp);
 		return;
 	} else if (strcmp(verb, "configure") == 0) {
 		g_cache_ctl_configure(req, mp);
 		return;
 	} else if (strcmp(verb, "destroy") == 0 ||
 	    strcmp(verb, "stop") == 0) {
 		g_cache_ctl_destroy(req, mp);
 		return;
 	} else if (strcmp(verb, "reset") == 0) {
 		g_cache_ctl_reset(req, mp);
 		return;
 	}
 
 	gctl_error(req, "Unknown verb.");
 }
 
 static void
 g_cache_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
     struct g_consumer *cp, struct g_provider *pp)
 {
 	struct g_cache_softc *sc;
 
 	if (pp != NULL || cp != NULL)
 		return;
 	sc = gp->softc;
 	sbuf_printf(sb, "%s<Size>%u</Size>\n", indent, sc->sc_maxent);
 	sbuf_printf(sb, "%s<BlockSize>%u</BlockSize>\n", indent, sc->sc_bsize);
 	sbuf_printf(sb, "%s<TailOffset>%ju</TailOffset>\n", indent,
 	    (uintmax_t)sc->sc_tail);
 	sbuf_printf(sb, "%s<Entries>%u</Entries>\n", indent, sc->sc_nent);
 	sbuf_printf(sb, "%s<UsedEntries>%u</UsedEntries>\n", indent,
 	    sc->sc_nused);
 	sbuf_printf(sb, "%s<InvalidEntries>%u</InvalidEntries>\n", indent,
 	    sc->sc_invalid);
 	sbuf_printf(sb, "%s<Reads>%ju</Reads>\n", indent, sc->sc_reads);
 	sbuf_printf(sb, "%s<ReadBytes>%ju</ReadBytes>\n", indent,
 	    sc->sc_readbytes);
 	sbuf_printf(sb, "%s<CacheReads>%ju</CacheReads>\n", indent,
 	    sc->sc_cachereads);
 	sbuf_printf(sb, "%s<CacheReadBytes>%ju</CacheReadBytes>\n", indent,
 	    sc->sc_cachereadbytes);
 	sbuf_printf(sb, "%s<CacheHits>%ju</CacheHits>\n", indent,
 	    sc->sc_cachehits);
 	sbuf_printf(sb, "%s<CacheMisses>%ju</CacheMisses>\n", indent,
 	    sc->sc_cachemisses);
 	sbuf_printf(sb, "%s<CacheFull>%ju</CacheFull>\n", indent,
 	    sc->sc_cachefull);
 	sbuf_printf(sb, "%s<Writes>%ju</Writes>\n", indent, sc->sc_writes);
 	sbuf_printf(sb, "%s<WroteBytes>%ju</WroteBytes>\n", indent,
 	    sc->sc_wrotebytes);
 }
 
 DECLARE_GEOM_CLASS(g_cache_class, g_cache);
 MODULE_VERSION(geom_cache, 0);
Index: head/sys/geom/cache/g_cache.h
===================================================================
--- head/sys/geom/cache/g_cache.h	(revision 350693)
+++ head/sys/geom/cache/g_cache.h	(revision 350694)
@@ -1,148 +1,133 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2006 Ruslan Ermilov <ru@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef	_G_CACHE_H_
 #define	_G_CACHE_H_
 
 #include <sys/endian.h>
 
 #define	G_CACHE_CLASS_NAME	"CACHE"
 #define	G_CACHE_MAGIC		"GEOM::CACHE"
 #define	G_CACHE_VERSION		1
 
 #ifdef _KERNEL
 #define	G_CACHE_TYPE_MANUAL	0
 #define	G_CACHE_TYPE_AUTOMATIC	1
 
-#define	G_CACHE_DEBUG(lvl, ...)	do {					\
-	if (g_cache_debug >= (lvl)) {					\
-		printf("GEOM_CACHE");					\
-		if (g_cache_debug > 0)					\
-			printf("[%u]", lvl);				\
-		printf(": ");						\
-		printf(__VA_ARGS__);					\
-		printf("\n");						\
-	}								\
-} while (0)
-#define	G_CACHE_LOGREQ(bp, ...)	do {					\
-	if (g_cache_debug >= 2) {					\
-		printf("GEOM_CACHE[2]: ");				\
-		printf(__VA_ARGS__);					\
-		printf(" ");						\
-		g_print_bio(bp);					\
-		printf("\n");						\
-	}								\
-} while (0)
+#define G_CACHE_DEBUG(lvl, ...) \
+    _GEOM_DEBUG("GEOM_CACHE", g_cache_debug, (lvl), NULL, __VA_ARGS__)
+#define G_CACHE_LOGREQ(bp, ...) \
+    _GEOM_DEBUG("GEOM_CACHE", g_cache_debug, 2, (bp), __VA_ARGS__)
 
 #define	G_CACHE_BUCKETS		(1 << 3)
 #define	G_CACHE_BUCKET(bno)	((bno) & (G_CACHE_BUCKETS - 1))
 
 struct g_cache_softc {
 	struct g_geom	*sc_geom;
 	int		sc_type;
 	u_int		sc_bshift;
 	u_int		sc_bsize;
 	off_t		sc_tail;
 	struct mtx	sc_mtx;
 	struct callout	sc_callout;
 	LIST_HEAD(, g_cache_desc) sc_desclist[G_CACHE_BUCKETS];
 	TAILQ_HEAD(, g_cache_desc) sc_usedlist;
 	uma_zone_t	sc_zone;
 
 	u_int		sc_maxent;		/* max entries */
 	u_int		sc_nent;		/* allocated entries */
 	u_int		sc_nused;		/* re-useable entries */
 	u_int		sc_invalid;		/* invalid entries */
 
 	uintmax_t	sc_reads;		/* #reads */
 	uintmax_t	sc_readbytes;		/* bytes read */
 	uintmax_t	sc_cachereads;		/* #reads from cache */
 	uintmax_t	sc_cachereadbytes;	/* bytes read from cache */
 	uintmax_t	sc_cachehits;		/* cache hits */
 	uintmax_t	sc_cachemisses;		/* cache misses */
 	uintmax_t	sc_cachefull;		/* #times a cache was full */
 	uintmax_t	sc_writes;		/* #writes */
 	uintmax_t	sc_wrotebytes;		/* bytes written */
 };
 #define	sc_name	sc_geom->name
 
 struct g_cache_desc {
 	off_t		d_bno;			/* block number */
 	caddr_t		d_data;			/* data area */
 	struct bio	*d_biolist;		/* waiters */
 	time_t		d_atime;		/* access time */
 	int		d_flags;		/* flags */
 #define	D_FLAG_USED	(1 << 0)			/* can be reused */
 #define	D_FLAG_INVALID	(1 << 1)			/* invalid */
 	LIST_ENTRY(g_cache_desc) d_next;	/* list */
 	TAILQ_ENTRY(g_cache_desc) d_used;	/* used list */
 };
 
 #define	G_CACHE_NEXT_BIO1(bp)	(bp)->bio_driver1
 #define	G_CACHE_NEXT_BIO2(bp)	(bp)->bio_driver2
 #define	G_CACHE_DESC1(bp)	(bp)->bio_caller1
 #define	G_CACHE_DESC2(bp)	(bp)->bio_caller2
 
 #endif	/* _KERNEL */
 
 struct g_cache_metadata {
 	char		md_magic[16];		/* Magic value. */
 	uint32_t	md_version;		/* Version number. */
 	char		md_name[16];		/* Cache value. */
 	uint32_t	md_bsize;		/* Cache block size. */
 	uint32_t	md_size;		/* Cache size. */
 	uint64_t	md_provsize;		/* Provider's size. */
 };
 
 static __inline void
 cache_metadata_encode(const struct g_cache_metadata *md, u_char *data)
 {
 
 	bcopy(md->md_magic, data, sizeof(md->md_magic));
 	le32enc(data + 16, md->md_version);
 	bcopy(md->md_name, data + 20, sizeof(md->md_name));
 	le32enc(data + 36, md->md_bsize);
 	le32enc(data + 40, md->md_size);
 	le64enc(data + 44, md->md_provsize);
 }
 
 static __inline void
 cache_metadata_decode(const u_char *data, struct g_cache_metadata *md)
 {
 
 	bcopy(data, md->md_magic, sizeof(md->md_magic));
 	md->md_version = le32dec(data + 16);
 	bcopy(data + 20, md->md_name, sizeof(md->md_name));
 	md->md_bsize = le32dec(data + 36);
 	md->md_size = le32dec(data + 40);
 	md->md_provsize = le64dec(data + 44);
 }
 
 #endif	/* _G_CACHE_H_ */
Index: head/sys/geom/concat/g_concat.c
===================================================================
--- head/sys/geom/concat/g_concat.c	(revision 350693)
+++ head/sys/geom/concat/g_concat.c	(revision 350694)
@@ -1,1029 +1,1030 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2004-2005 Pawel Jakub Dawidek <pjd@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/bio.h>
 #include <sys/sbuf.h>
 #include <sys/sysctl.h>
 #include <sys/malloc.h>
 #include <geom/geom.h>
+#include <geom/geom_dbg.h>
 #include <geom/concat/g_concat.h>
 
 FEATURE(geom_concat, "GEOM concatenation support");
 
 static MALLOC_DEFINE(M_CONCAT, "concat_data", "GEOM_CONCAT Data");
 
 SYSCTL_DECL(_kern_geom);
 static SYSCTL_NODE(_kern_geom, OID_AUTO, concat, CTLFLAG_RW, 0,
     "GEOM_CONCAT stuff");
 static u_int g_concat_debug = 0;
 SYSCTL_UINT(_kern_geom_concat, OID_AUTO, debug, CTLFLAG_RWTUN, &g_concat_debug, 0,
     "Debug level");
 
 static int g_concat_destroy(struct g_concat_softc *sc, boolean_t force);
 static int g_concat_destroy_geom(struct gctl_req *req, struct g_class *mp,
     struct g_geom *gp);
 
 static g_taste_t g_concat_taste;
 static g_ctl_req_t g_concat_config;
 static g_dumpconf_t g_concat_dumpconf;
 
 struct g_class g_concat_class = {
 	.name = G_CONCAT_CLASS_NAME,
 	.version = G_VERSION,
 	.ctlreq = g_concat_config,
 	.taste = g_concat_taste,
 	.destroy_geom = g_concat_destroy_geom
 };
 
 
 /*
  * Greatest Common Divisor.
  */
 static u_int
 gcd(u_int a, u_int b)
 {
 	u_int c;
 
 	while (b != 0) {
 		c = a;
 		a = b;
 		b = (c % b);
 	}
 	return (a);
 }
 
 /*
  * Least Common Multiple.
  */
 static u_int
 lcm(u_int a, u_int b)
 {
 
 	return ((a * b) / gcd(a, b));
 }
 
 /*
  * Return the number of valid disks.
  */
 static u_int
 g_concat_nvalid(struct g_concat_softc *sc)
 {
 	u_int i, no;
 
 	no = 0;
 	for (i = 0; i < sc->sc_ndisks; i++) {
 		if (sc->sc_disks[i].d_consumer != NULL)
 			no++;
 	}
 
 	return (no);
 }
 
 static void
 g_concat_remove_disk(struct g_concat_disk *disk)
 {
 	struct g_consumer *cp;
 	struct g_concat_softc *sc;
 
 	g_topology_assert();
 	KASSERT(disk->d_consumer != NULL, ("Non-valid disk in %s.", __func__));
 	sc = disk->d_softc;
 	cp = disk->d_consumer;
 
 	if (!disk->d_removed) {
 		G_CONCAT_DEBUG(0, "Disk %s removed from %s.",
 		    cp->provider->name, sc->sc_name);
 		disk->d_removed = 1;
 	}
 
 	if (sc->sc_provider != NULL) {
 		G_CONCAT_DEBUG(0, "Device %s deactivated.",
 		    sc->sc_provider->name);
 		g_wither_provider(sc->sc_provider, ENXIO);
 		sc->sc_provider = NULL;
 	}
 
 	if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
 		return;
 	disk->d_consumer = NULL;
 	g_detach(cp);
 	g_destroy_consumer(cp);
 	/* If there are no valid disks anymore, remove device. */
 	if (LIST_EMPTY(&sc->sc_geom->consumer))
 		g_concat_destroy(sc, 1);
 }
 
 static void
 g_concat_orphan(struct g_consumer *cp)
 {
 	struct g_concat_softc *sc;
 	struct g_concat_disk *disk;
 	struct g_geom *gp;
 
 	g_topology_assert();
 	gp = cp->geom;
 	sc = gp->softc;
 	if (sc == NULL)
 		return;
 
 	disk = cp->private;
 	if (disk == NULL)	/* Possible? */
 		return;
 	g_concat_remove_disk(disk);
 }
 
 static int
 g_concat_access(struct g_provider *pp, int dr, int dw, int de)
 {
 	struct g_consumer *cp1, *cp2, *tmp;
 	struct g_concat_disk *disk;
 	struct g_geom *gp;
 	int error;
 
 	g_topology_assert();
 	gp = pp->geom;
 
 	/* On first open, grab an extra "exclusive" bit */
 	if (pp->acr == 0 && pp->acw == 0 && pp->ace == 0)
 		de++;
 	/* ... and let go of it on last close */
 	if ((pp->acr + dr) == 0 && (pp->acw + dw) == 0 && (pp->ace + de) == 0)
 		de--;
 
 	LIST_FOREACH_SAFE(cp1, &gp->consumer, consumer, tmp) {
 		error = g_access(cp1, dr, dw, de);
 		if (error != 0)
 			goto fail;
 		disk = cp1->private;
 		if (cp1->acr == 0 && cp1->acw == 0 && cp1->ace == 0 &&
 		    disk->d_removed) {
 			g_concat_remove_disk(disk); /* May destroy geom. */
 		}
 	}
 	return (0);
 
 fail:
 	LIST_FOREACH(cp2, &gp->consumer, consumer) {
 		if (cp1 == cp2)
 			break;
 		g_access(cp2, -dr, -dw, -de);
 	}
 	return (error);
 }
 
 static void
 g_concat_candelete(struct bio *bp)
 {
 	struct g_concat_softc *sc;
 	struct g_concat_disk *disk;
 	int i, val;
 
 	sc = bp->bio_to->geom->softc;
 	for (i = 0; i < sc->sc_ndisks; i++) {
 		disk = &sc->sc_disks[i];
 		if (!disk->d_removed && disk->d_candelete)
 			break;
 	}
 	val = i < sc->sc_ndisks;
 	g_handleattr(bp, "GEOM::candelete", &val, sizeof(val));
 }
 
 static void
 g_concat_kernel_dump(struct bio *bp)
 {
 	struct g_concat_softc *sc;
 	struct g_concat_disk *disk;
 	struct bio *cbp;
 	struct g_kerneldump *gkd;
 	u_int i;
 
 	sc = bp->bio_to->geom->softc;
 	gkd = (struct g_kerneldump *)bp->bio_data;
 	for (i = 0; i < sc->sc_ndisks; i++) {
 		if (sc->sc_disks[i].d_start <= gkd->offset &&
 		    sc->sc_disks[i].d_end > gkd->offset)
 			break;
 	}
 	if (i == sc->sc_ndisks) {
 		g_io_deliver(bp, EOPNOTSUPP);
 		return;
 	}
 	disk = &sc->sc_disks[i];
 	gkd->offset -= disk->d_start;
 	if (gkd->length > disk->d_end - disk->d_start - gkd->offset)
 		gkd->length = disk->d_end - disk->d_start - gkd->offset;
 	cbp = g_clone_bio(bp);
 	if (cbp == NULL) {
 		g_io_deliver(bp, ENOMEM);
 		return;
 	}
 	cbp->bio_done = g_std_done;
 	g_io_request(cbp, disk->d_consumer);
 	G_CONCAT_DEBUG(1, "Kernel dump will go to %s.",
 	    disk->d_consumer->provider->name);
 }
 
 static void
 g_concat_done(struct bio *bp)
 {
 	struct g_concat_softc *sc;
 	struct bio *pbp;
 
 	pbp = bp->bio_parent;
 	sc = pbp->bio_to->geom->softc;
 	mtx_lock(&sc->sc_lock);
 	if (pbp->bio_error == 0)
 		pbp->bio_error = bp->bio_error;
 	pbp->bio_completed += bp->bio_completed;
 	pbp->bio_inbed++;
 	if (pbp->bio_children == pbp->bio_inbed) {
 		mtx_unlock(&sc->sc_lock);
 		g_io_deliver(pbp, pbp->bio_error);
 	} else
 		mtx_unlock(&sc->sc_lock);
 	g_destroy_bio(bp);
 }
 
 static void
 g_concat_flush(struct g_concat_softc *sc, struct bio *bp)
 {
 	struct bio_queue_head queue;
 	struct g_consumer *cp;
 	struct bio *cbp;
 	u_int no;
 
 	bioq_init(&queue);
 	for (no = 0; no < sc->sc_ndisks; no++) {
 		cbp = g_clone_bio(bp);
 		if (cbp == NULL) {
 			while ((cbp = bioq_takefirst(&queue)) != NULL)
 				g_destroy_bio(cbp);
 			if (bp->bio_error == 0)
 				bp->bio_error = ENOMEM;
 			g_io_deliver(bp, bp->bio_error);
 			return;
 		}
 		bioq_insert_tail(&queue, cbp);
 		cbp->bio_done = g_concat_done;
 		cbp->bio_caller1 = sc->sc_disks[no].d_consumer;
 		cbp->bio_to = sc->sc_disks[no].d_consumer->provider;
 	}
 	while ((cbp = bioq_takefirst(&queue)) != NULL) {
 		G_CONCAT_LOGREQ(cbp, "Sending request.");
 		cp = cbp->bio_caller1;
 		cbp->bio_caller1 = NULL;
 		g_io_request(cbp, cp);
 	}
 }
 
 static void
 g_concat_start(struct bio *bp)
 {
 	struct bio_queue_head queue;
 	struct g_concat_softc *sc;
 	struct g_concat_disk *disk;
 	struct g_provider *pp;
 	off_t offset, end, length, off, len;
 	struct bio *cbp;
 	char *addr;
 	u_int no;
 
 	pp = bp->bio_to;
 	sc = pp->geom->softc;
 	/*
 	 * If sc == NULL, provider's error should be set and g_concat_start()
 	 * should not be called at all.
 	 */
 	KASSERT(sc != NULL,
 	    ("Provider's error should be set (error=%d)(device=%s).",
 	    bp->bio_to->error, bp->bio_to->name));
 
 	G_CONCAT_LOGREQ(bp, "Request received.");
 
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 	case BIO_WRITE:
 	case BIO_DELETE:
 		break;
 	case BIO_FLUSH:
 		g_concat_flush(sc, bp);
 		return;
 	case BIO_GETATTR:
 		if (strcmp("GEOM::kerneldump", bp->bio_attribute) == 0) {
 			g_concat_kernel_dump(bp);
 			return;
 		} else if (strcmp("GEOM::candelete", bp->bio_attribute) == 0) {
 			g_concat_candelete(bp);
 			return;
 		}
 		/* To which provider it should be delivered? */
 		/* FALLTHROUGH */
 	default:
 		g_io_deliver(bp, EOPNOTSUPP);
 		return;
 	}
 
 	offset = bp->bio_offset;
 	length = bp->bio_length;
 	if ((bp->bio_flags & BIO_UNMAPPED) != 0)
 		addr = NULL;
 	else
 		addr = bp->bio_data;
 	end = offset + length;
 
 	bioq_init(&queue);
 	for (no = 0; no < sc->sc_ndisks; no++) {
 		disk = &sc->sc_disks[no];
 		if (disk->d_end <= offset)
 			continue;
 		if (disk->d_start >= end)
 			break;
 
 		off = offset - disk->d_start;
 		len = MIN(length, disk->d_end - offset);
 		length -= len;
 		offset += len;
 
 		cbp = g_clone_bio(bp);
 		if (cbp == NULL) {
 			while ((cbp = bioq_takefirst(&queue)) != NULL)
 				g_destroy_bio(cbp);
 			if (bp->bio_error == 0)
 				bp->bio_error = ENOMEM;
 			g_io_deliver(bp, bp->bio_error);
 			return;
 		}
 		bioq_insert_tail(&queue, cbp);
 		/*
 		 * Fill in the component buf structure.
 		 */
 		if (len == bp->bio_length)
 			cbp->bio_done = g_std_done;
 		else
 			cbp->bio_done = g_concat_done;
 		cbp->bio_offset = off;
 		cbp->bio_length = len;
 		if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
 			cbp->bio_ma_offset += (uintptr_t)addr;
 			cbp->bio_ma += cbp->bio_ma_offset / PAGE_SIZE;
 			cbp->bio_ma_offset %= PAGE_SIZE;
 			cbp->bio_ma_n = round_page(cbp->bio_ma_offset +
 			    cbp->bio_length) / PAGE_SIZE;
 		} else
 			cbp->bio_data = addr;
 		addr += len;
 		cbp->bio_to = disk->d_consumer->provider;
 		cbp->bio_caller1 = disk;
 
 		if (length == 0)
 			break;
 	}
 	KASSERT(length == 0,
 	    ("Length is still greater than 0 (class=%s, name=%s).",
 	    bp->bio_to->geom->class->name, bp->bio_to->geom->name));
 	while ((cbp = bioq_takefirst(&queue)) != NULL) {
 		G_CONCAT_LOGREQ(cbp, "Sending request.");
 		disk = cbp->bio_caller1;
 		cbp->bio_caller1 = NULL;
 		g_io_request(cbp, disk->d_consumer);
 	}
 }
 
 static void
 g_concat_check_and_run(struct g_concat_softc *sc)
 {
 	struct g_concat_disk *disk;
 	struct g_provider *dp, *pp;
 	u_int no, sectorsize = 0;
 	off_t start;
 	int error;
 
 	g_topology_assert();
 	if (g_concat_nvalid(sc) != sc->sc_ndisks)
 		return;
 
 	pp = g_new_providerf(sc->sc_geom, "concat/%s", sc->sc_name);
 	pp->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE |
 	    G_PF_ACCEPT_UNMAPPED;
 	start = 0;
 	for (no = 0; no < sc->sc_ndisks; no++) {
 		disk = &sc->sc_disks[no];
 		dp = disk->d_consumer->provider;
 		disk->d_start = start;
 		disk->d_end = disk->d_start + dp->mediasize;
 		if (sc->sc_type == G_CONCAT_TYPE_AUTOMATIC)
 			disk->d_end -= dp->sectorsize;
 		start = disk->d_end;
 		error = g_access(disk->d_consumer, 1, 0, 0);
 		if (error == 0) {
 			error = g_getattr("GEOM::candelete", disk->d_consumer,
 			    &disk->d_candelete);
 			if (error != 0)
 				disk->d_candelete = 0;
 			(void)g_access(disk->d_consumer, -1, 0, 0);
 		} else
 			G_CONCAT_DEBUG(1, "Failed to access disk %s, error %d.",
 			    dp->name, error);
 		if (no == 0)
 			sectorsize = dp->sectorsize;
 		else
 			sectorsize = lcm(sectorsize, dp->sectorsize);
 
 		/* A provider underneath us doesn't support unmapped */
 		if ((dp->flags & G_PF_ACCEPT_UNMAPPED) == 0) {
 			G_CONCAT_DEBUG(1, "Cancelling unmapped "
 			    "because of %s.", dp->name);
 			pp->flags &= ~G_PF_ACCEPT_UNMAPPED;
 		}
 	}
 	pp->sectorsize = sectorsize;
 	/* We have sc->sc_disks[sc->sc_ndisks - 1].d_end in 'start'. */
 	pp->mediasize = start;
 	pp->stripesize = sc->sc_disks[0].d_consumer->provider->stripesize;
 	pp->stripeoffset = sc->sc_disks[0].d_consumer->provider->stripeoffset;
 	sc->sc_provider = pp;
 	g_error_provider(pp, 0);
 
 	G_CONCAT_DEBUG(0, "Device %s activated.", sc->sc_provider->name);
 }
 
 static int
 g_concat_read_metadata(struct g_consumer *cp, struct g_concat_metadata *md)
 {
 	struct g_provider *pp;
 	u_char *buf;
 	int error;
 
 	g_topology_assert();
 
 	error = g_access(cp, 1, 0, 0);
 	if (error != 0)
 		return (error);
 	pp = cp->provider;
 	g_topology_unlock();
 	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
 	    &error);
 	g_topology_lock();
 	g_access(cp, -1, 0, 0);
 	if (buf == NULL)
 		return (error);
 
 	/* Decode metadata. */
 	concat_metadata_decode(buf, md);
 	g_free(buf);
 
 	return (0);
 }
 
 /*
  * Add disk to given device.
  */
 static int
 g_concat_add_disk(struct g_concat_softc *sc, struct g_provider *pp, u_int no)
 {
 	struct g_concat_disk *disk;
 	struct g_consumer *cp, *fcp;
 	struct g_geom *gp;
 	int error;
 
 	g_topology_assert();
 	/* Metadata corrupted? */
 	if (no >= sc->sc_ndisks)
 		return (EINVAL);
 
 	disk = &sc->sc_disks[no];
 	/* Check if disk is not already attached. */
 	if (disk->d_consumer != NULL)
 		return (EEXIST);
 
 	gp = sc->sc_geom;
 	fcp = LIST_FIRST(&gp->consumer);
 
 	cp = g_new_consumer(gp);
 	cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
 	error = g_attach(cp, pp);
 	if (error != 0) {
 		g_destroy_consumer(cp);
 		return (error);
 	}
 
 	if (fcp != NULL && (fcp->acr > 0 || fcp->acw > 0 || fcp->ace > 0)) {
 		error = g_access(cp, fcp->acr, fcp->acw, fcp->ace);
 		if (error != 0) {
 			g_detach(cp);
 			g_destroy_consumer(cp);
 			return (error);
 		}
 	}
 	if (sc->sc_type == G_CONCAT_TYPE_AUTOMATIC) {
 		struct g_concat_metadata md;
 
 		/* Re-read metadata. */
 		error = g_concat_read_metadata(cp, &md);
 		if (error != 0)
 			goto fail;
 
 		if (strcmp(md.md_magic, G_CONCAT_MAGIC) != 0 ||
 		    strcmp(md.md_name, sc->sc_name) != 0 ||
 		    md.md_id != sc->sc_id) {
 			G_CONCAT_DEBUG(0, "Metadata on %s changed.", pp->name);
 			goto fail;
 		}
 	}
 
 	cp->private = disk;
 	disk->d_consumer = cp;
 	disk->d_softc = sc;
 	disk->d_start = 0;	/* not yet */
 	disk->d_end = 0;	/* not yet */
 	disk->d_removed = 0;
 
 	G_CONCAT_DEBUG(0, "Disk %s attached to %s.", pp->name, sc->sc_name);
 
 	g_concat_check_and_run(sc);
 
 	return (0);
 fail:
 	if (fcp != NULL && (fcp->acr > 0 || fcp->acw > 0 || fcp->ace > 0))
 		g_access(cp, -fcp->acr, -fcp->acw, -fcp->ace);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 	return (error);
 }
 
 static struct g_geom *
 g_concat_create(struct g_class *mp, const struct g_concat_metadata *md,
     u_int type)
 {
 	struct g_concat_softc *sc;
 	struct g_geom *gp;
 	u_int no;
 
 	G_CONCAT_DEBUG(1, "Creating device %s (id=%u).", md->md_name,
 	    md->md_id);
 
 	/* One disks is minimum. */
 	if (md->md_all < 1)
 		return (NULL);
 
 	/* Check for duplicate unit */
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		sc = gp->softc;
 		if (sc != NULL && strcmp(sc->sc_name, md->md_name) == 0) {
 			G_CONCAT_DEBUG(0, "Device %s already configured.",
 			    gp->name);
 			return (NULL);
 		}
 	}
 	gp = g_new_geomf(mp, "%s", md->md_name);
 	sc = malloc(sizeof(*sc), M_CONCAT, M_WAITOK | M_ZERO);
 	gp->start = g_concat_start;
 	gp->spoiled = g_concat_orphan;
 	gp->orphan = g_concat_orphan;
 	gp->access = g_concat_access;
 	gp->dumpconf = g_concat_dumpconf;
 
 	sc->sc_id = md->md_id;
 	sc->sc_ndisks = md->md_all;
 	sc->sc_disks = malloc(sizeof(struct g_concat_disk) * sc->sc_ndisks,
 	    M_CONCAT, M_WAITOK | M_ZERO);
 	for (no = 0; no < sc->sc_ndisks; no++)
 		sc->sc_disks[no].d_consumer = NULL;
 	sc->sc_type = type;
 	mtx_init(&sc->sc_lock, "gconcat lock", NULL, MTX_DEF);
 
 	gp->softc = sc;
 	sc->sc_geom = gp;
 	sc->sc_provider = NULL;
 
 	G_CONCAT_DEBUG(0, "Device %s created (id=%u).", sc->sc_name, sc->sc_id);
 
 	return (gp);
 }
 
 static int
 g_concat_destroy(struct g_concat_softc *sc, boolean_t force)
 {
 	struct g_provider *pp;
 	struct g_consumer *cp, *cp1;
 	struct g_geom *gp;
 
 	g_topology_assert();
 
 	if (sc == NULL)
 		return (ENXIO);
 
 	pp = sc->sc_provider;
 	if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
 		if (force) {
 			G_CONCAT_DEBUG(0, "Device %s is still open, so it "
 			    "can't be definitely removed.", pp->name);
 		} else {
 			G_CONCAT_DEBUG(1,
 			    "Device %s is still open (r%dw%de%d).", pp->name,
 			    pp->acr, pp->acw, pp->ace);
 			return (EBUSY);
 		}
 	}
 
 	gp = sc->sc_geom;
 	LIST_FOREACH_SAFE(cp, &gp->consumer, consumer, cp1) {
 		g_concat_remove_disk(cp->private);
 		if (cp1 == NULL)
 			return (0);	/* Recursion happened. */
 	}
 	if (!LIST_EMPTY(&gp->consumer))
 		return (EINPROGRESS);
 
 	gp->softc = NULL;
 	KASSERT(sc->sc_provider == NULL, ("Provider still exists? (device=%s)",
 	    gp->name));
 	free(sc->sc_disks, M_CONCAT);
 	mtx_destroy(&sc->sc_lock);
 	free(sc, M_CONCAT);
 
 	G_CONCAT_DEBUG(0, "Device %s destroyed.", gp->name);
 	g_wither_geom(gp, ENXIO);
 	return (0);
 }
 
 static int
 g_concat_destroy_geom(struct gctl_req *req __unused,
     struct g_class *mp __unused, struct g_geom *gp)
 {
 	struct g_concat_softc *sc;
 
 	sc = gp->softc;
 	return (g_concat_destroy(sc, 0));
 }
 
 static struct g_geom *
 g_concat_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
 {
 	struct g_concat_metadata md;
 	struct g_concat_softc *sc;
 	struct g_consumer *cp;
 	struct g_geom *gp;
 	int error;
 
 	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
 	g_topology_assert();
 
 	/* Skip providers that are already open for writing. */
 	if (pp->acw > 0)
 		return (NULL);
 
 	G_CONCAT_DEBUG(3, "Tasting %s.", pp->name);
 
 	gp = g_new_geomf(mp, "concat:taste");
 	gp->start = g_concat_start;
 	gp->access = g_concat_access;
 	gp->orphan = g_concat_orphan;
 	cp = g_new_consumer(gp);
 	g_attach(cp, pp);
 	error = g_concat_read_metadata(cp, &md);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 	g_destroy_geom(gp);
 	if (error != 0)
 		return (NULL);
 	gp = NULL;
 
 	if (strcmp(md.md_magic, G_CONCAT_MAGIC) != 0)
 		return (NULL);
 	if (md.md_version > G_CONCAT_VERSION) {
 		printf("geom_concat.ko module is too old to handle %s.\n",
 		    pp->name);
 		return (NULL);
 	}
 	/*
 	 * Backward compatibility:
 	 */
 	/* There was no md_provider field in earlier versions of metadata. */
 	if (md.md_version < 3)
 		bzero(md.md_provider, sizeof(md.md_provider));
 	/* There was no md_provsize field in earlier versions of metadata. */
 	if (md.md_version < 4)
 		md.md_provsize = pp->mediasize;
 
 	if (md.md_provider[0] != '\0' &&
 	    !g_compare_names(md.md_provider, pp->name))
 		return (NULL);
 	if (md.md_provsize != pp->mediasize)
 		return (NULL);
 
 	/*
 	 * Let's check if device already exists.
 	 */
 	sc = NULL;
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		sc = gp->softc;
 		if (sc == NULL)
 			continue;
 		if (sc->sc_type != G_CONCAT_TYPE_AUTOMATIC)
 			continue;
 		if (strcmp(md.md_name, sc->sc_name) != 0)
 			continue;
 		if (md.md_id != sc->sc_id)
 			continue;
 		break;
 	}
 	if (gp != NULL) {
 		G_CONCAT_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
 		error = g_concat_add_disk(sc, pp, md.md_no);
 		if (error != 0) {
 			G_CONCAT_DEBUG(0,
 			    "Cannot add disk %s to %s (error=%d).", pp->name,
 			    gp->name, error);
 			return (NULL);
 		}
 	} else {
 		gp = g_concat_create(mp, &md, G_CONCAT_TYPE_AUTOMATIC);
 		if (gp == NULL) {
 			G_CONCAT_DEBUG(0, "Cannot create device %s.",
 			    md.md_name);
 			return (NULL);
 		}
 		sc = gp->softc;
 		G_CONCAT_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
 		error = g_concat_add_disk(sc, pp, md.md_no);
 		if (error != 0) {
 			G_CONCAT_DEBUG(0,
 			    "Cannot add disk %s to %s (error=%d).", pp->name,
 			    gp->name, error);
 			g_concat_destroy(sc, 1);
 			return (NULL);
 		}
 	}
 
 	return (gp);
 }
 
 static void
 g_concat_ctl_create(struct gctl_req *req, struct g_class *mp)
 {
 	u_int attached, no;
 	struct g_concat_metadata md;
 	struct g_provider *pp;
 	struct g_concat_softc *sc;
 	struct g_geom *gp;
 	struct sbuf *sb;
 	const char *name;
 	char param[16];
 	int *nargs;
 
 	g_topology_assert();
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "No '%s' argument.", "nargs");
 		return;
 	}
 	if (*nargs < 2) {
 		gctl_error(req, "Too few arguments.");
 		return;
 	}
 
 	strlcpy(md.md_magic, G_CONCAT_MAGIC, sizeof(md.md_magic));
 	md.md_version = G_CONCAT_VERSION;
 	name = gctl_get_asciiparam(req, "arg0");
 	if (name == NULL) {
 		gctl_error(req, "No 'arg%u' argument.", 0);
 		return;
 	}
 	strlcpy(md.md_name, name, sizeof(md.md_name));
 	md.md_id = arc4random();
 	md.md_no = 0;
 	md.md_all = *nargs - 1;
 	bzero(md.md_provider, sizeof(md.md_provider));
 	/* This field is not important here. */
 	md.md_provsize = 0;
 
 	/* Check all providers are valid */
 	for (no = 1; no < *nargs; no++) {
 		snprintf(param, sizeof(param), "arg%u", no);
 		name = gctl_get_asciiparam(req, param);
 		if (name == NULL) {
 			gctl_error(req, "No 'arg%u' argument.", no);
 			return;
 		}
 		if (strncmp(name, "/dev/", strlen("/dev/")) == 0)
 			name += strlen("/dev/");
 		pp = g_provider_by_name(name);
 		if (pp == NULL) {
 			G_CONCAT_DEBUG(1, "Disk %s is invalid.", name);
 			gctl_error(req, "Disk %s is invalid.", name);
 			return;
 		}
 	}
 
 	gp = g_concat_create(mp, &md, G_CONCAT_TYPE_MANUAL);
 	if (gp == NULL) {
 		gctl_error(req, "Can't configure %s.", md.md_name);
 		return;
 	}
 
 	sc = gp->softc;
 	sb = sbuf_new_auto();
 	sbuf_printf(sb, "Can't attach disk(s) to %s:", gp->name);
 	for (attached = 0, no = 1; no < *nargs; no++) {
 		snprintf(param, sizeof(param), "arg%u", no);
 		name = gctl_get_asciiparam(req, param);
 		if (name == NULL) {
 			gctl_error(req, "No 'arg%d' argument.", no);
 			return;
 		}
 		if (strncmp(name, "/dev/", strlen("/dev/")) == 0)
 			name += strlen("/dev/");
 		pp = g_provider_by_name(name);
 		KASSERT(pp != NULL, ("Provider %s disappear?!", name));
 		if (g_concat_add_disk(sc, pp, no - 1) != 0) {
 			G_CONCAT_DEBUG(1, "Disk %u (%s) not attached to %s.",
 			    no, pp->name, gp->name);
 			sbuf_printf(sb, " %s", pp->name);
 			continue;
 		}
 		attached++;
 	}
 	sbuf_finish(sb);
 	if (md.md_all != attached) {
 		g_concat_destroy(gp->softc, 1);
 		gctl_error(req, "%s", sbuf_data(sb));
 	}
 	sbuf_delete(sb);
 }
 
 static struct g_concat_softc *
 g_concat_find_device(struct g_class *mp, const char *name)
 {
 	struct g_concat_softc *sc;
 	struct g_geom *gp;
 
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		sc = gp->softc;
 		if (sc == NULL)
 			continue;
 		if (strcmp(sc->sc_name, name) == 0)
 			return (sc);
 	}
 	return (NULL);
 }
 
 static void
 g_concat_ctl_destroy(struct gctl_req *req, struct g_class *mp)
 {
 	struct g_concat_softc *sc;
 	int *force, *nargs, error;
 	const char *name;
 	char param[16];
 	u_int i;
 
 	g_topology_assert();
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "No '%s' argument.", "nargs");
 		return;
 	}
 	if (*nargs <= 0) {
 		gctl_error(req, "Missing device(s).");
 		return;
 	}
 	force = gctl_get_paraml(req, "force", sizeof(*force));
 	if (force == NULL) {
 		gctl_error(req, "No '%s' argument.", "force");
 		return;
 	}
 
 	for (i = 0; i < (u_int)*nargs; i++) {
 		snprintf(param, sizeof(param), "arg%u", i);
 		name = gctl_get_asciiparam(req, param);
 		if (name == NULL) {
 			gctl_error(req, "No 'arg%u' argument.", i);
 			return;
 		}
 		sc = g_concat_find_device(mp, name);
 		if (sc == NULL) {
 			gctl_error(req, "No such device: %s.", name);
 			return;
 		}
 		error = g_concat_destroy(sc, *force);
 		if (error != 0) {
 			gctl_error(req, "Cannot destroy device %s (error=%d).",
 			    sc->sc_name, error);
 			return;
 		}
 	}
 }
 
 static void
 g_concat_config(struct gctl_req *req, struct g_class *mp, const char *verb)
 {
 	uint32_t *version;
 
 	g_topology_assert();
 
 	version = gctl_get_paraml(req, "version", sizeof(*version));
 	if (version == NULL) {
 		gctl_error(req, "No '%s' argument.", "version");
 		return;
 	}
 	if (*version != G_CONCAT_VERSION) {
 		gctl_error(req, "Userland and kernel parts are out of sync.");
 		return;
 	}
 
 	if (strcmp(verb, "create") == 0) {
 		g_concat_ctl_create(req, mp);
 		return;
 	} else if (strcmp(verb, "destroy") == 0 ||
 	    strcmp(verb, "stop") == 0) {
 		g_concat_ctl_destroy(req, mp);
 		return;
 	}
 	gctl_error(req, "Unknown verb.");
 }
 
 static void
 g_concat_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
     struct g_consumer *cp, struct g_provider *pp)
 {
 	struct g_concat_softc *sc;
 
 	g_topology_assert();
 	sc = gp->softc;
 	if (sc == NULL)
 		return;
 	if (pp != NULL) {
 		/* Nothing here. */
 	} else if (cp != NULL) {
 		struct g_concat_disk *disk;
 
 		disk = cp->private;
 		if (disk == NULL)
 			return;
 		sbuf_printf(sb, "%s<End>%jd</End>\n", indent,
 		    (intmax_t)disk->d_end);
 		sbuf_printf(sb, "%s<Start>%jd</Start>\n", indent,
 		    (intmax_t)disk->d_start);
 	} else {
 		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id);
 		sbuf_printf(sb, "%s<Type>", indent);
 		switch (sc->sc_type) {
 		case G_CONCAT_TYPE_AUTOMATIC:
 			sbuf_cat(sb, "AUTOMATIC");
 			break;
 		case G_CONCAT_TYPE_MANUAL:
 			sbuf_cat(sb, "MANUAL");
 			break;
 		default:
 			sbuf_cat(sb, "UNKNOWN");
 			break;
 		}
 		sbuf_cat(sb, "</Type>\n");
 		sbuf_printf(sb, "%s<Status>Total=%u, Online=%u</Status>\n",
 		    indent, sc->sc_ndisks, g_concat_nvalid(sc));
 		sbuf_printf(sb, "%s<State>", indent);
 		if (sc->sc_provider != NULL && sc->sc_provider->error == 0)
 			sbuf_cat(sb, "UP");
 		else
 			sbuf_cat(sb, "DOWN");
 		sbuf_cat(sb, "</State>\n");
 	}
 }
 
 DECLARE_GEOM_CLASS(g_concat_class, g_concat);
 MODULE_VERSION(geom_concat, 0);
Index: head/sys/geom/concat/g_concat.h
===================================================================
--- head/sys/geom/concat/g_concat.h	(revision 350693)
+++ head/sys/geom/concat/g_concat.h	(revision 350694)
@@ -1,130 +1,115 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2004-2005 Pawel Jakub Dawidek <pjd@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef	_G_CONCAT_H_
 #define	_G_CONCAT_H_
 
 #include <sys/endian.h>
 
 #define	G_CONCAT_CLASS_NAME	"CONCAT"
 
 #define	G_CONCAT_MAGIC		"GEOM::CONCAT"
 /*
  * Version history:
  * 1 - Initial version number.
  * 2 - Added 'stop' command to gconcat(8).
  * 3 - Added md_provider field to metadata and '-h' option to gconcat(8).
  * 4 - Added md_provsize field to metadata.
  */
 #define	G_CONCAT_VERSION	4
 
 #ifdef _KERNEL
 #define	G_CONCAT_TYPE_MANUAL	0
 #define	G_CONCAT_TYPE_AUTOMATIC	1
 
-#define	G_CONCAT_DEBUG(lvl, ...)	do {				\
-	if (g_concat_debug >= (lvl)) {					\
-		printf("GEOM_CONCAT");					\
-		if (g_concat_debug > 0)					\
-			printf("[%u]", lvl);				\
-		printf(": ");						\
-		printf(__VA_ARGS__);					\
-		printf("\n");						\
-	}								\
-} while (0)
-#define	G_CONCAT_LOGREQ(bp, ...)	do {				\
-	if (g_concat_debug >= 2) {					\
-		printf("GEOM_CONCAT[2]: ");				\
-		printf(__VA_ARGS__);					\
-		printf(" ");						\
-		g_print_bio(bp);					\
-		printf("\n");						\
-	}								\
-} while (0)
+#define G_CONCAT_DEBUG(lvl, ...) \
+    _GEOM_DEBUG("GEOM_CONCAT", g_concat_debug, (lvl), NULL, __VA_ARGS__)
+#define G_CONCAT_LOGREQ(bp, ...) \
+    _GEOM_DEBUG("GEOM_CONCAT", g_concat_debug, 2, (bp), __VA_ARGS__)
 
 struct g_concat_disk {
 	struct g_consumer	*d_consumer;
 	struct g_concat_softc	*d_softc;
 	off_t			 d_start;
 	off_t			 d_end;
 	int			 d_candelete;
 	int			 d_removed;
 };
 
 struct g_concat_softc {
 	u_int		 sc_type;	/* provider type */
 	struct g_geom	*sc_geom;
 	struct g_provider *sc_provider;
 	uint32_t	 sc_id;		/* concat unique ID */
 
 	struct g_concat_disk *sc_disks;
 	uint16_t	 sc_ndisks;
 	struct mtx	 sc_lock;
 };
 #define	sc_name	sc_geom->name
 #endif	/* _KERNEL */
 
 struct g_concat_metadata {
 	char		md_magic[16];	/* Magic value. */
 	uint32_t	md_version;	/* Version number. */
 	char		md_name[16];	/* Concat name. */
 	uint32_t	md_id;		/* Unique ID. */
 	uint16_t	md_no;		/* Disk number. */
 	uint16_t	md_all;		/* Number of all disks. */
 	char		md_provider[16]; /* Hardcoded provider. */
 	uint64_t	md_provsize;	/* Provider's size. */
 };
 static __inline void
 concat_metadata_encode(const struct g_concat_metadata *md, u_char *data)
 {
 
 	bcopy(md->md_magic, data, sizeof(md->md_magic));
 	le32enc(data + 16, md->md_version);
 	bcopy(md->md_name, data + 20, sizeof(md->md_name));
 	le32enc(data + 36, md->md_id);
 	le16enc(data + 40, md->md_no);
 	le16enc(data + 42, md->md_all);
 	bcopy(md->md_provider, data + 44, sizeof(md->md_provider));
 	le64enc(data + 60, md->md_provsize);
 }
 static __inline void
 concat_metadata_decode(const u_char *data, struct g_concat_metadata *md)
 {
 
 	bcopy(data, md->md_magic, sizeof(md->md_magic));
 	md->md_version = le32dec(data + 16);
 	bcopy(data + 20, md->md_name, sizeof(md->md_name));
 	md->md_id = le32dec(data + 36);
 	md->md_no = le16dec(data + 40);
 	md->md_all = le16dec(data + 42);
 	bcopy(data + 44, md->md_provider, sizeof(md->md_provider));
 	md->md_provsize = le64dec(data + 60);
 }
 #endif	/* _G_CONCAT_H_ */
Index: head/sys/geom/eli/g_eli.c
===================================================================
--- head/sys/geom/eli/g_eli.c	(revision 350693)
+++ head/sys/geom/eli/g_eli.c	(revision 350694)
@@ -1,1439 +1,1440 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2005-2019 Pawel Jakub Dawidek <pawel@dawidek.net>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/cons.h>
 #include <sys/kernel.h>
 #include <sys/linker.h>
 #include <sys/module.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/bio.h>
 #include <sys/sbuf.h>
 #include <sys/sysctl.h>
 #include <sys/malloc.h>
 #include <sys/eventhandler.h>
 #include <sys/kthread.h>
 #include <sys/proc.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 #include <sys/uio.h>
 #include <sys/vnode.h>
 
 #include <vm/uma.h>
 
 #include <geom/geom.h>
+#include <geom/geom_dbg.h>
 #include <geom/eli/g_eli.h>
 #include <geom/eli/pkcs5v2.h>
 
 #include <crypto/intake.h>
 
 FEATURE(geom_eli, "GEOM crypto module");
 
 MALLOC_DEFINE(M_ELI, "eli data", "GEOM_ELI Data");
 
 SYSCTL_DECL(_kern_geom);
 SYSCTL_NODE(_kern_geom, OID_AUTO, eli, CTLFLAG_RW, 0, "GEOM_ELI stuff");
 static int g_eli_version = G_ELI_VERSION;
 SYSCTL_INT(_kern_geom_eli, OID_AUTO, version, CTLFLAG_RD, &g_eli_version, 0,
     "GELI version");
 int g_eli_debug = 0;
 SYSCTL_INT(_kern_geom_eli, OID_AUTO, debug, CTLFLAG_RWTUN, &g_eli_debug, 0,
     "Debug level");
 static u_int g_eli_tries = 3;
 SYSCTL_UINT(_kern_geom_eli, OID_AUTO, tries, CTLFLAG_RWTUN, &g_eli_tries, 0,
     "Number of tries for entering the passphrase");
 static u_int g_eli_visible_passphrase = GETS_NOECHO;
 SYSCTL_UINT(_kern_geom_eli, OID_AUTO, visible_passphrase, CTLFLAG_RWTUN,
     &g_eli_visible_passphrase, 0,
     "Visibility of passphrase prompt (0 = invisible, 1 = visible, 2 = asterisk)");
 u_int g_eli_overwrites = G_ELI_OVERWRITES;
 SYSCTL_UINT(_kern_geom_eli, OID_AUTO, overwrites, CTLFLAG_RWTUN, &g_eli_overwrites,
     0, "Number of times on-disk keys should be overwritten when destroying them");
 static u_int g_eli_threads = 0;
 SYSCTL_UINT(_kern_geom_eli, OID_AUTO, threads, CTLFLAG_RWTUN, &g_eli_threads, 0,
     "Number of threads doing crypto work");
 u_int g_eli_batch = 0;
 SYSCTL_UINT(_kern_geom_eli, OID_AUTO, batch, CTLFLAG_RWTUN, &g_eli_batch, 0,
     "Use crypto operations batching");
 
 /*
  * Passphrase cached during boot, in order to be more user-friendly if
  * there are multiple providers using the same passphrase.
  */
 static char cached_passphrase[256];
 static u_int g_eli_boot_passcache = 1;
 TUNABLE_INT("kern.geom.eli.boot_passcache", &g_eli_boot_passcache);
 SYSCTL_UINT(_kern_geom_eli, OID_AUTO, boot_passcache, CTLFLAG_RD,
     &g_eli_boot_passcache, 0,
     "Passphrases are cached during boot process for possible reuse");
 static void
 fetch_loader_passphrase(void * dummy)
 {
 	char * env_passphrase;
 
 	KASSERT(dynamic_kenv, ("need dynamic kenv"));
 
 	if ((env_passphrase = kern_getenv("kern.geom.eli.passphrase")) != NULL) {
 		/* Extract passphrase from the environment. */
 		strlcpy(cached_passphrase, env_passphrase,
 		    sizeof(cached_passphrase));
 		freeenv(env_passphrase);
 
 		/* Wipe the passphrase from the environment. */
 		kern_unsetenv("kern.geom.eli.passphrase");
 	}
 }
 SYSINIT(geli_fetch_loader_passphrase, SI_SUB_KMEM + 1, SI_ORDER_ANY,
     fetch_loader_passphrase, NULL);
 
 static void
 zero_boot_passcache(void)
 {
 
         explicit_bzero(cached_passphrase, sizeof(cached_passphrase));
 }
 
 static void
 zero_geli_intake_keys(void)
 {
         struct keybuf *keybuf;
         int i;
 
         if ((keybuf = get_keybuf()) != NULL) {
                 /* Scan the key buffer, clear all GELI keys. */
                 for (i = 0; i < keybuf->kb_nents; i++) {
                          if (keybuf->kb_ents[i].ke_type == KEYBUF_TYPE_GELI) {
                                  explicit_bzero(keybuf->kb_ents[i].ke_data,
                                      sizeof(keybuf->kb_ents[i].ke_data));
                                  keybuf->kb_ents[i].ke_type = KEYBUF_TYPE_NONE;
                          }
                 }
         }
 }
 
 static void
 zero_intake_passcache(void *dummy)
 {
         zero_boot_passcache();
         zero_geli_intake_keys();
 }
 EVENTHANDLER_DEFINE(mountroot, zero_intake_passcache, NULL, 0);
 
 static eventhandler_tag g_eli_pre_sync = NULL;
 
 static int g_eli_read_metadata_offset(struct g_class *mp, struct g_provider *pp,
     off_t offset, struct g_eli_metadata *md);
 
 static int g_eli_destroy_geom(struct gctl_req *req, struct g_class *mp,
     struct g_geom *gp);
 static void g_eli_init(struct g_class *mp);
 static void g_eli_fini(struct g_class *mp);
 
 static g_taste_t g_eli_taste;
 static g_dumpconf_t g_eli_dumpconf;
 
 struct g_class g_eli_class = {
 	.name = G_ELI_CLASS_NAME,
 	.version = G_VERSION,
 	.ctlreq = g_eli_config,
 	.taste = g_eli_taste,
 	.destroy_geom = g_eli_destroy_geom,
 	.init = g_eli_init,
 	.fini = g_eli_fini
 };
 
 
 /*
  * Code paths:
  * BIO_READ:
  *	g_eli_start -> g_eli_crypto_read -> g_io_request -> g_eli_read_done -> g_eli_crypto_run -> g_eli_crypto_read_done -> g_io_deliver
  * BIO_WRITE:
  *	g_eli_start -> g_eli_crypto_run -> g_eli_crypto_write_done -> g_io_request -> g_eli_write_done -> g_io_deliver
  */
 
 
 /*
  * EAGAIN from crypto(9) means, that we were probably balanced to another crypto
  * accelerator or something like this.
  * The function updates the SID and rerun the operation.
  */
 int
 g_eli_crypto_rerun(struct cryptop *crp)
 {
 	struct g_eli_softc *sc;
 	struct g_eli_worker *wr;
 	struct bio *bp;
 	int error;
 
 	bp = (struct bio *)crp->crp_opaque;
 	sc = bp->bio_to->geom->softc;
 	LIST_FOREACH(wr, &sc->sc_workers, w_next) {
 		if (wr->w_number == bp->bio_pflags)
 			break;
 	}
 	KASSERT(wr != NULL, ("Invalid worker (%u).", bp->bio_pflags));
 	G_ELI_DEBUG(1, "Rerunning crypto %s request (sid: %p -> %p).",
 	    bp->bio_cmd == BIO_READ ? "READ" : "WRITE", wr->w_sid,
 	    crp->crp_session);
 	wr->w_sid = crp->crp_session;
 	crp->crp_etype = 0;
 	error = crypto_dispatch(crp);
 	if (error == 0)
 		return (0);
 	G_ELI_DEBUG(1, "%s: crypto_dispatch() returned %d.", __func__, error);
 	crp->crp_etype = error;
 	return (error);
 }
 
 static void
 g_eli_getattr_done(struct bio *bp)
 {
 	if (bp->bio_error == 0 && 
 	    !strcmp(bp->bio_attribute, "GEOM::physpath")) {
 		strlcat(bp->bio_data, "/eli", bp->bio_length);
 	}
 	g_std_done(bp);
 }
 
 /*
  * The function is called afer reading encrypted data from the provider.
  *
  * g_eli_start -> g_eli_crypto_read -> g_io_request -> G_ELI_READ_DONE -> g_eli_crypto_run -> g_eli_crypto_read_done -> g_io_deliver
  */
 void
 g_eli_read_done(struct bio *bp)
 {
 	struct g_eli_softc *sc;
 	struct bio *pbp;
 
 	G_ELI_LOGREQ(2, bp, "Request done.");
 	pbp = bp->bio_parent;
 	if (pbp->bio_error == 0 && bp->bio_error != 0)
 		pbp->bio_error = bp->bio_error;
 	g_destroy_bio(bp);
 	/*
 	 * Do we have all sectors already?
 	 */
 	pbp->bio_inbed++;
 	if (pbp->bio_inbed < pbp->bio_children)
 		return;
 	sc = pbp->bio_to->geom->softc;
 	if (pbp->bio_error != 0) {
 		G_ELI_LOGREQ(0, pbp, "%s() failed (error=%d)", __func__,
 		    pbp->bio_error);
 		pbp->bio_completed = 0;
 		if (pbp->bio_driver2 != NULL) {
 			free(pbp->bio_driver2, M_ELI);
 			pbp->bio_driver2 = NULL;
 		}
 		g_io_deliver(pbp, pbp->bio_error);
 		if (sc != NULL)
 			atomic_subtract_int(&sc->sc_inflight, 1);
 		return;
 	}
 	mtx_lock(&sc->sc_queue_mtx);
 	bioq_insert_tail(&sc->sc_queue, pbp);
 	mtx_unlock(&sc->sc_queue_mtx);
 	wakeup(sc);
 }
 
 /*
  * The function is called after we encrypt and write data.
  *
  * g_eli_start -> g_eli_crypto_run -> g_eli_crypto_write_done -> g_io_request -> G_ELI_WRITE_DONE -> g_io_deliver
  */
 void
 g_eli_write_done(struct bio *bp)
 {
 	struct g_eli_softc *sc;
 	struct bio *pbp;
 
 	G_ELI_LOGREQ(2, bp, "Request done.");
 	pbp = bp->bio_parent;
 	if (pbp->bio_error == 0 && bp->bio_error != 0)
 		pbp->bio_error = bp->bio_error;
 	g_destroy_bio(bp);
 	/*
 	 * Do we have all sectors already?
 	 */
 	pbp->bio_inbed++;
 	if (pbp->bio_inbed < pbp->bio_children)
 		return;
 	free(pbp->bio_driver2, M_ELI);
 	pbp->bio_driver2 = NULL;
 	if (pbp->bio_error != 0) {
 		G_ELI_LOGREQ(0, pbp, "%s() failed (error=%d)", __func__,
 		    pbp->bio_error);
 		pbp->bio_completed = 0;
 	} else
 		pbp->bio_completed = pbp->bio_length;
 
 	/*
 	 * Write is finished, send it up.
 	 */
 	sc = pbp->bio_to->geom->softc;
 	g_io_deliver(pbp, pbp->bio_error);
 	if (sc != NULL)
 		atomic_subtract_int(&sc->sc_inflight, 1);
 }
 
 /*
  * This function should never be called, but GEOM made as it set ->orphan()
  * method for every geom.
  */
 static void
 g_eli_orphan_spoil_assert(struct g_consumer *cp)
 {
 
 	panic("Function %s() called for %s.", __func__, cp->geom->name);
 }
 
 static void
 g_eli_orphan(struct g_consumer *cp)
 {
 	struct g_eli_softc *sc;
 
 	g_topology_assert();
 	sc = cp->geom->softc;
 	if (sc == NULL)
 		return;
 	g_eli_destroy(sc, TRUE);
 }
 
 static void
 g_eli_resize(struct g_consumer *cp)
 {
 	struct g_eli_softc *sc;
 	struct g_provider *epp, *pp;
 	off_t oldsize;
 
 	g_topology_assert();
 	sc = cp->geom->softc;
 	if (sc == NULL)
 		return;
 
 	if ((sc->sc_flags & G_ELI_FLAG_AUTORESIZE) == 0) {
 		G_ELI_DEBUG(0, "Autoresize is turned off, old size: %jd.",
 		    (intmax_t)sc->sc_provsize);
 		return;
 	}
 
 	pp = cp->provider;
 
 	if ((sc->sc_flags & G_ELI_FLAG_ONETIME) == 0) {
 		struct g_eli_metadata md;
 		u_char *sector;
 		int error;
 
 		sector = NULL;
 
 		error = g_eli_read_metadata_offset(cp->geom->class, pp,
 		    sc->sc_provsize - pp->sectorsize, &md);
 		if (error != 0) {
 			G_ELI_DEBUG(0, "Cannot read metadata from %s (error=%d).",
 			    pp->name, error);
 			goto iofail;
 		}
 
 		md.md_provsize = pp->mediasize;
 
 		sector = malloc(pp->sectorsize, M_ELI, M_WAITOK | M_ZERO);
 		eli_metadata_encode(&md, sector);
 		error = g_write_data(cp, pp->mediasize - pp->sectorsize, sector,
 		    pp->sectorsize);
 		if (error != 0) {
 			G_ELI_DEBUG(0, "Cannot store metadata on %s (error=%d).",
 			    pp->name, error);
 			goto iofail;
 		}
 		explicit_bzero(sector, pp->sectorsize);
 		error = g_write_data(cp, sc->sc_provsize - pp->sectorsize,
 		    sector, pp->sectorsize);
 		if (error != 0) {
 			G_ELI_DEBUG(0, "Cannot clear old metadata from %s (error=%d).",
 			    pp->name, error);
 			goto iofail;
 		}
 iofail:
 		explicit_bzero(&md, sizeof(md));
 		if (sector != NULL) {
 			explicit_bzero(sector, pp->sectorsize);
 			free(sector, M_ELI);
 		}
 	}
 
 	oldsize = sc->sc_mediasize;
 	sc->sc_mediasize = eli_mediasize(sc, pp->mediasize, pp->sectorsize);
 	g_eli_key_resize(sc);
 	sc->sc_provsize = pp->mediasize;
 
 	epp = LIST_FIRST(&sc->sc_geom->provider);
 	g_resize_provider(epp, sc->sc_mediasize);
 	G_ELI_DEBUG(0, "Device %s size changed from %jd to %jd.", epp->name,
 	    (intmax_t)oldsize, (intmax_t)sc->sc_mediasize);
 }
 
 /*
  * BIO_READ:
  *	G_ELI_START -> g_eli_crypto_read -> g_io_request -> g_eli_read_done -> g_eli_crypto_run -> g_eli_crypto_read_done -> g_io_deliver
  * BIO_WRITE:
  *	G_ELI_START -> g_eli_crypto_run -> g_eli_crypto_write_done -> g_io_request -> g_eli_write_done -> g_io_deliver
  */
 static void
 g_eli_start(struct bio *bp)
 {
 	struct g_eli_softc *sc;
 	struct g_consumer *cp;
 	struct bio *cbp;
 
 	sc = bp->bio_to->geom->softc;
 	KASSERT(sc != NULL,
 	    ("Provider's error should be set (error=%d)(device=%s).",
 	    bp->bio_to->error, bp->bio_to->name));
 	G_ELI_LOGREQ(2, bp, "Request received.");
 
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 	case BIO_WRITE:
 	case BIO_GETATTR:
 	case BIO_FLUSH:
 	case BIO_ZONE:
 		break;
 	case BIO_DELETE:
 		/*
 		 * If the user hasn't set the NODELETE flag, we just pass
 		 * it down the stack and let the layers beneath us do (or
 		 * not) whatever they do with it.  If they have, we
 		 * reject it.  A possible extension would be an
 		 * additional flag to take it as a hint to shred the data
 		 * with [multiple?] overwrites.
 		 */
 		if (!(sc->sc_flags & G_ELI_FLAG_NODELETE))
 			break;
 	default:
 		g_io_deliver(bp, EOPNOTSUPP);
 		return;
 	}
 	cbp = g_clone_bio(bp);
 	if (cbp == NULL) {
 		g_io_deliver(bp, ENOMEM);
 		return;
 	}
 	bp->bio_driver1 = cbp;
 	bp->bio_pflags = G_ELI_NEW_BIO;
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 		if (!(sc->sc_flags & G_ELI_FLAG_AUTH)) {
 			g_eli_crypto_read(sc, bp, 0);
 			break;
 		}
 		/* FALLTHROUGH */
 	case BIO_WRITE:
 		mtx_lock(&sc->sc_queue_mtx);
 		bioq_insert_tail(&sc->sc_queue, bp);
 		mtx_unlock(&sc->sc_queue_mtx);
 		wakeup(sc);
 		break;
 	case BIO_GETATTR:
 	case BIO_FLUSH:
 	case BIO_DELETE:
 	case BIO_ZONE:
 		if (bp->bio_cmd == BIO_GETATTR)
 			cbp->bio_done = g_eli_getattr_done;
 		else
 			cbp->bio_done = g_std_done;
 		cp = LIST_FIRST(&sc->sc_geom->consumer);
 		cbp->bio_to = cp->provider;
 		G_ELI_LOGREQ(2, cbp, "Sending request.");
 		g_io_request(cbp, cp);
 		break;
 	}
 }
 
 static int
 g_eli_newsession(struct g_eli_worker *wr)
 {
 	struct g_eli_softc *sc;
 	struct cryptoini crie, cria;
 	int error;
 
 	sc = wr->w_softc;
 
 	bzero(&crie, sizeof(crie));
 	crie.cri_alg = sc->sc_ealgo;
 	crie.cri_klen = sc->sc_ekeylen;
 	if (sc->sc_ealgo == CRYPTO_AES_XTS)
 		crie.cri_klen <<= 1;
 	if ((sc->sc_flags & G_ELI_FLAG_FIRST_KEY) != 0) {
 		crie.cri_key = g_eli_key_hold(sc, 0,
 		    LIST_FIRST(&sc->sc_geom->consumer)->provider->sectorsize);
 	} else {
 		crie.cri_key = sc->sc_ekey;
 	}
 	if (sc->sc_flags & G_ELI_FLAG_AUTH) {
 		bzero(&cria, sizeof(cria));
 		cria.cri_alg = sc->sc_aalgo;
 		cria.cri_klen = sc->sc_akeylen;
 		cria.cri_key = sc->sc_akey;
 		crie.cri_next = &cria;
 	}
 
 	switch (sc->sc_crypto) {
 	case G_ELI_CRYPTO_SW:
 		error = crypto_newsession(&wr->w_sid, &crie,
 		    CRYPTOCAP_F_SOFTWARE);
 		break;
 	case G_ELI_CRYPTO_HW:
 		error = crypto_newsession(&wr->w_sid, &crie,
 		    CRYPTOCAP_F_HARDWARE);
 		break;
 	case G_ELI_CRYPTO_UNKNOWN:
 		error = crypto_newsession(&wr->w_sid, &crie,
 		    CRYPTOCAP_F_HARDWARE);
 		if (error == 0) {
 			mtx_lock(&sc->sc_queue_mtx);
 			if (sc->sc_crypto == G_ELI_CRYPTO_UNKNOWN)
 				sc->sc_crypto = G_ELI_CRYPTO_HW;
 			mtx_unlock(&sc->sc_queue_mtx);
 		} else {
 			error = crypto_newsession(&wr->w_sid, &crie,
 			    CRYPTOCAP_F_SOFTWARE);
 			mtx_lock(&sc->sc_queue_mtx);
 			if (sc->sc_crypto == G_ELI_CRYPTO_UNKNOWN)
 				sc->sc_crypto = G_ELI_CRYPTO_SW;
 			mtx_unlock(&sc->sc_queue_mtx);
 		}
 		break;
 	default:
 		panic("%s: invalid condition", __func__);
 	}
 
 	if ((sc->sc_flags & G_ELI_FLAG_FIRST_KEY) != 0)
 		g_eli_key_drop(sc, crie.cri_key);
 
 	return (error);
 }
 
 static void
 g_eli_freesession(struct g_eli_worker *wr)
 {
 
 	crypto_freesession(wr->w_sid);
 }
 
 static void
 g_eli_cancel(struct g_eli_softc *sc)
 {
 	struct bio *bp;
 
 	mtx_assert(&sc->sc_queue_mtx, MA_OWNED);
 
 	while ((bp = bioq_takefirst(&sc->sc_queue)) != NULL) {
 		KASSERT(bp->bio_pflags == G_ELI_NEW_BIO,
 		    ("Not new bio when canceling (bp=%p).", bp));
 		g_io_deliver(bp, ENXIO);
 	}
 }
 
 static struct bio *
 g_eli_takefirst(struct g_eli_softc *sc)
 {
 	struct bio *bp;
 
 	mtx_assert(&sc->sc_queue_mtx, MA_OWNED);
 
 	if (!(sc->sc_flags & G_ELI_FLAG_SUSPEND))
 		return (bioq_takefirst(&sc->sc_queue));
 	/*
 	 * Device suspended, so we skip new I/O requests.
 	 */
 	TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) {
 		if (bp->bio_pflags != G_ELI_NEW_BIO)
 			break;
 	}
 	if (bp != NULL)
 		bioq_remove(&sc->sc_queue, bp);
 	return (bp);
 }
 
 /*
  * This is the main function for kernel worker thread when we don't have
  * hardware acceleration and we have to do cryptography in software.
  * Dedicated thread is needed, so we don't slow down g_up/g_down GEOM
  * threads with crypto work.
  */
 static void
 g_eli_worker(void *arg)
 {
 	struct g_eli_softc *sc;
 	struct g_eli_worker *wr;
 	struct bio *bp;
 	int error;
 
 	wr = arg;
 	sc = wr->w_softc;
 #ifdef EARLY_AP_STARTUP
 	MPASS(!sc->sc_cpubind || smp_started);
 #elif defined(SMP)
 	/* Before sched_bind() to a CPU, wait for all CPUs to go on-line. */
 	if (sc->sc_cpubind) {
 		while (!smp_started)
 			tsleep(wr, 0, "geli:smp", hz / 4);
 	}
 #endif
 	thread_lock(curthread);
 	sched_prio(curthread, PUSER);
 	if (sc->sc_cpubind)
 		sched_bind(curthread, wr->w_number % mp_ncpus);
 	thread_unlock(curthread);
 
 	G_ELI_DEBUG(1, "Thread %s started.", curthread->td_proc->p_comm);
 
 	for (;;) {
 		mtx_lock(&sc->sc_queue_mtx);
 again:
 		bp = g_eli_takefirst(sc);
 		if (bp == NULL) {
 			if (sc->sc_flags & G_ELI_FLAG_DESTROY) {
 				g_eli_cancel(sc);
 				LIST_REMOVE(wr, w_next);
 				g_eli_freesession(wr);
 				free(wr, M_ELI);
 				G_ELI_DEBUG(1, "Thread %s exiting.",
 				    curthread->td_proc->p_comm);
 				wakeup(&sc->sc_workers);
 				mtx_unlock(&sc->sc_queue_mtx);
 				kproc_exit(0);
 			}
 			while (sc->sc_flags & G_ELI_FLAG_SUSPEND) {
 				if (sc->sc_inflight > 0) {
 					G_ELI_DEBUG(0, "inflight=%d",
 					    sc->sc_inflight);
 					/*
 					 * We still have inflight BIOs, so
 					 * sleep and retry.
 					 */
 					msleep(sc, &sc->sc_queue_mtx, PRIBIO,
 					    "geli:inf", hz / 5);
 					goto again;
 				}
 				/*
 				 * Suspend requested, mark the worker as
 				 * suspended and go to sleep.
 				 */
 				if (wr->w_active) {
 					g_eli_freesession(wr);
 					wr->w_active = FALSE;
 				}
 				wakeup(&sc->sc_workers);
 				msleep(sc, &sc->sc_queue_mtx, PRIBIO,
 				    "geli:suspend", 0);
 				if (!wr->w_active &&
 				    !(sc->sc_flags & G_ELI_FLAG_SUSPEND)) {
 					error = g_eli_newsession(wr);
 					KASSERT(error == 0,
 					    ("g_eli_newsession() failed on resume (error=%d)",
 					    error));
 					wr->w_active = TRUE;
 				}
 				goto again;
 			}
 			msleep(sc, &sc->sc_queue_mtx, PDROP, "geli:w", 0);
 			continue;
 		}
 		if (bp->bio_pflags == G_ELI_NEW_BIO)
 			atomic_add_int(&sc->sc_inflight, 1);
 		mtx_unlock(&sc->sc_queue_mtx);
 		if (bp->bio_pflags == G_ELI_NEW_BIO) {
 			bp->bio_pflags = 0;
 			if (sc->sc_flags & G_ELI_FLAG_AUTH) {
 				if (bp->bio_cmd == BIO_READ)
 					g_eli_auth_read(sc, bp);
 				else
 					g_eli_auth_run(wr, bp);
 			} else {
 				if (bp->bio_cmd == BIO_READ)
 					g_eli_crypto_read(sc, bp, 1);
 				else
 					g_eli_crypto_run(wr, bp);
 			}
 		} else {
 			if (sc->sc_flags & G_ELI_FLAG_AUTH)
 				g_eli_auth_run(wr, bp);
 			else
 				g_eli_crypto_run(wr, bp);
 		}
 	}
 }
 
 static int
 g_eli_read_metadata_offset(struct g_class *mp, struct g_provider *pp,
     off_t offset, struct g_eli_metadata *md)
 {
 	struct g_geom *gp;
 	struct g_consumer *cp;
 	u_char *buf = NULL;
 	int error;
 
 	g_topology_assert();
 
 	gp = g_new_geomf(mp, "eli:taste");
 	gp->start = g_eli_start;
 	gp->access = g_std_access;
 	/*
 	 * g_eli_read_metadata() is always called from the event thread.
 	 * Our geom is created and destroyed in the same event, so there
 	 * could be no orphan nor spoil event in the meantime.
 	 */
 	gp->orphan = g_eli_orphan_spoil_assert;
 	gp->spoiled = g_eli_orphan_spoil_assert;
 	cp = g_new_consumer(gp);
 	error = g_attach(cp, pp);
 	if (error != 0)
 		goto end;
 	error = g_access(cp, 1, 0, 0);
 	if (error != 0)
 		goto end;
 	g_topology_unlock();
 	buf = g_read_data(cp, offset, pp->sectorsize, &error);
 	g_topology_lock();
 	if (buf == NULL)
 		goto end;
 	error = eli_metadata_decode(buf, md);
 	if (error != 0)
 		goto end;
 	/* Metadata was read and decoded successfully. */
 end:
 	if (buf != NULL)
 		g_free(buf);
 	if (cp->provider != NULL) {
 		if (cp->acr == 1)
 			g_access(cp, -1, 0, 0);
 		g_detach(cp);
 	}
 	g_destroy_consumer(cp);
 	g_destroy_geom(gp);
 	return (error);
 }
 
 int
 g_eli_read_metadata(struct g_class *mp, struct g_provider *pp,
     struct g_eli_metadata *md)
 {
 
 	return (g_eli_read_metadata_offset(mp, pp,
 	    pp->mediasize - pp->sectorsize, md));
 }
 
 /*
  * The function is called when we had last close on provider and user requested
  * to close it when this situation occur.
  */
 static void
 g_eli_last_close(void *arg, int flags __unused)
 {
 	struct g_geom *gp;
 	char gpname[64];
 	int error;
 
 	g_topology_assert();
 	gp = arg;
 	strlcpy(gpname, gp->name, sizeof(gpname));
 	error = g_eli_destroy(gp->softc, TRUE);
 	KASSERT(error == 0, ("Cannot detach %s on last close (error=%d).",
 	    gpname, error));
 	G_ELI_DEBUG(0, "Detached %s on last close.", gpname);
 }
 
 int
 g_eli_access(struct g_provider *pp, int dr, int dw, int de)
 {
 	struct g_eli_softc *sc;
 	struct g_geom *gp;
 
 	gp = pp->geom;
 	sc = gp->softc;
 
 	if (dw > 0) {
 		if (sc->sc_flags & G_ELI_FLAG_RO) {
 			/* Deny write attempts. */
 			return (EROFS);
 		}
 		/* Someone is opening us for write, we need to remember that. */
 		sc->sc_flags |= G_ELI_FLAG_WOPEN;
 		return (0);
 	}
 	/* Is this the last close? */
 	if (pp->acr + dr > 0 || pp->acw + dw > 0 || pp->ace + de > 0)
 		return (0);
 
 	/*
 	 * Automatically detach on last close if requested.
 	 */
 	if ((sc->sc_flags & G_ELI_FLAG_RW_DETACH) ||
 	    (sc->sc_flags & G_ELI_FLAG_WOPEN)) {
 		g_post_event(g_eli_last_close, gp, M_WAITOK, NULL);
 	}
 	return (0);
 }
 
 static int
 g_eli_cpu_is_disabled(int cpu)
 {
 #ifdef SMP
 	return (CPU_ISSET(cpu, &hlt_cpus_mask));
 #else
 	return (0);
 #endif
 }
 
 struct g_geom *
 g_eli_create(struct gctl_req *req, struct g_class *mp, struct g_provider *bpp,
     const struct g_eli_metadata *md, const u_char *mkey, int nkey)
 {
 	struct g_eli_softc *sc;
 	struct g_eli_worker *wr;
 	struct g_geom *gp;
 	struct g_provider *pp;
 	struct g_consumer *cp;
 	u_int i, threads;
 	int dcw, error;
 
 	G_ELI_DEBUG(1, "Creating device %s%s.", bpp->name, G_ELI_SUFFIX);
 
 	gp = g_new_geomf(mp, "%s%s", bpp->name, G_ELI_SUFFIX);
 	sc = malloc(sizeof(*sc), M_ELI, M_WAITOK | M_ZERO);
 	gp->start = g_eli_start;
 	/*
 	 * Spoiling can happen even though we have the provider open
 	 * exclusively, e.g. through media change events.
 	 */
 	gp->spoiled = g_eli_orphan;
 	gp->orphan = g_eli_orphan;
 	gp->resize = g_eli_resize;
 	gp->dumpconf = g_eli_dumpconf;
 	/*
 	 * If detach-on-last-close feature is not enabled and we don't operate
 	 * on read-only provider, we can simply use g_std_access().
 	 */
 	if (md->md_flags & (G_ELI_FLAG_WO_DETACH | G_ELI_FLAG_RO))
 		gp->access = g_eli_access;
 	else
 		gp->access = g_std_access;
 
 	eli_metadata_softc(sc, md, bpp->sectorsize, bpp->mediasize);
 	sc->sc_nkey = nkey;
 
 	gp->softc = sc;
 	sc->sc_geom = gp;
 
 	bioq_init(&sc->sc_queue);
 	mtx_init(&sc->sc_queue_mtx, "geli:queue", NULL, MTX_DEF);
 	mtx_init(&sc->sc_ekeys_lock, "geli:ekeys", NULL, MTX_DEF);
 
 	pp = NULL;
 	cp = g_new_consumer(gp);
 	error = g_attach(cp, bpp);
 	if (error != 0) {
 		if (req != NULL) {
 			gctl_error(req, "Cannot attach to %s (error=%d).",
 			    bpp->name, error);
 		} else {
 			G_ELI_DEBUG(1, "Cannot attach to %s (error=%d).",
 			    bpp->name, error);
 		}
 		goto failed;
 	}
 	/*
 	 * Keep provider open all the time, so we can run critical tasks,
 	 * like Master Keys deletion, without wondering if we can open
 	 * provider or not.
 	 * We don't open provider for writing only when user requested read-only
 	 * access.
 	 */
 	dcw = (sc->sc_flags & G_ELI_FLAG_RO) ? 0 : 1;
 	error = g_access(cp, 1, dcw, 1);
 	if (error != 0) {
 		if (req != NULL) {
 			gctl_error(req, "Cannot access %s (error=%d).",
 			    bpp->name, error);
 		} else {
 			G_ELI_DEBUG(1, "Cannot access %s (error=%d).",
 			    bpp->name, error);
 		}
 		goto failed;
 	}
 
 	/*
 	 * Remember the keys in our softc structure.
 	 */
 	g_eli_mkey_propagate(sc, mkey);
 
 	LIST_INIT(&sc->sc_workers);
 
 	threads = g_eli_threads;
 	if (threads == 0)
 		threads = mp_ncpus;
 	sc->sc_cpubind = (mp_ncpus > 1 && threads == mp_ncpus);
 	for (i = 0; i < threads; i++) {
 		if (g_eli_cpu_is_disabled(i)) {
 			G_ELI_DEBUG(1, "%s: CPU %u disabled, skipping.",
 			    bpp->name, i);
 			continue;
 		}
 		wr = malloc(sizeof(*wr), M_ELI, M_WAITOK | M_ZERO);
 		wr->w_softc = sc;
 		wr->w_number = i;
 		wr->w_active = TRUE;
 
 		error = g_eli_newsession(wr);
 		if (error != 0) {
 			free(wr, M_ELI);
 			if (req != NULL) {
 				gctl_error(req, "Cannot set up crypto session "
 				    "for %s (error=%d).", bpp->name, error);
 			} else {
 				G_ELI_DEBUG(1, "Cannot set up crypto session "
 				    "for %s (error=%d).", bpp->name, error);
 			}
 			goto failed;
 		}
 
 		error = kproc_create(g_eli_worker, wr, &wr->w_proc, 0, 0,
 		    "g_eli[%u] %s", i, bpp->name);
 		if (error != 0) {
 			g_eli_freesession(wr);
 			free(wr, M_ELI);
 			if (req != NULL) {
 				gctl_error(req, "Cannot create kernel thread "
 				    "for %s (error=%d).", bpp->name, error);
 			} else {
 				G_ELI_DEBUG(1, "Cannot create kernel thread "
 				    "for %s (error=%d).", bpp->name, error);
 			}
 			goto failed;
 		}
 		LIST_INSERT_HEAD(&sc->sc_workers, wr, w_next);
 	}
 
 	/*
 	 * Create decrypted provider.
 	 */
 	pp = g_new_providerf(gp, "%s%s", bpp->name, G_ELI_SUFFIX);
 	pp->mediasize = sc->sc_mediasize;
 	pp->sectorsize = sc->sc_sectorsize;
 
 	g_error_provider(pp, 0);
 
 	G_ELI_DEBUG(0, "Device %s created.", pp->name);
 	G_ELI_DEBUG(0, "Encryption: %s %u", g_eli_algo2str(sc->sc_ealgo),
 	    sc->sc_ekeylen);
 	switch (sc->sc_ealgo) {
 	case CRYPTO_3DES_CBC:
 		gone_in(13,
 		    "support for GEOM_ELI volumes encrypted with 3des");
 		break;
 	case CRYPTO_BLF_CBC:
 		gone_in(13,
 		    "support for GEOM_ELI volumes encrypted with blowfish");
 		break;
 	}
 	if (sc->sc_flags & G_ELI_FLAG_AUTH) {
 		G_ELI_DEBUG(0, " Integrity: %s", g_eli_algo2str(sc->sc_aalgo));
 		switch (sc->sc_aalgo) {
 		case CRYPTO_MD5_HMAC:
 			gone_in(13,
 		    "support for GEOM_ELI volumes authenticated with hmac/md5");
 			break;
 		}
 	}
 	G_ELI_DEBUG(0, "    Crypto: %s",
 	    sc->sc_crypto == G_ELI_CRYPTO_SW ? "software" : "hardware");
 	return (gp);
 failed:
 	mtx_lock(&sc->sc_queue_mtx);
 	sc->sc_flags |= G_ELI_FLAG_DESTROY;
 	wakeup(sc);
 	/*
 	 * Wait for kernel threads self destruction.
 	 */
 	while (!LIST_EMPTY(&sc->sc_workers)) {
 		msleep(&sc->sc_workers, &sc->sc_queue_mtx, PRIBIO,
 		    "geli:destroy", 0);
 	}
 	mtx_destroy(&sc->sc_queue_mtx);
 	if (cp->provider != NULL) {
 		if (cp->acr == 1)
 			g_access(cp, -1, -dcw, -1);
 		g_detach(cp);
 	}
 	g_destroy_consumer(cp);
 	g_destroy_geom(gp);
 	g_eli_key_destroy(sc);
 	bzero(sc, sizeof(*sc));
 	free(sc, M_ELI);
 	return (NULL);
 }
 
 int
 g_eli_destroy(struct g_eli_softc *sc, boolean_t force)
 {
 	struct g_geom *gp;
 	struct g_provider *pp;
 
 	g_topology_assert();
 
 	if (sc == NULL)
 		return (ENXIO);
 
 	gp = sc->sc_geom;
 	pp = LIST_FIRST(&gp->provider);
 	if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
 		if (force) {
 			G_ELI_DEBUG(1, "Device %s is still open, so it "
 			    "cannot be definitely removed.", pp->name);
 			sc->sc_flags |= G_ELI_FLAG_RW_DETACH;
 			gp->access = g_eli_access;
 			g_wither_provider(pp, ENXIO);
 			return (EBUSY);
 		} else {
 			G_ELI_DEBUG(1,
 			    "Device %s is still open (r%dw%de%d).", pp->name,
 			    pp->acr, pp->acw, pp->ace);
 			return (EBUSY);
 		}
 	}
 
 	mtx_lock(&sc->sc_queue_mtx);
 	sc->sc_flags |= G_ELI_FLAG_DESTROY;
 	wakeup(sc);
 	while (!LIST_EMPTY(&sc->sc_workers)) {
 		msleep(&sc->sc_workers, &sc->sc_queue_mtx, PRIBIO,
 		    "geli:destroy", 0);
 	}
 	mtx_destroy(&sc->sc_queue_mtx);
 	gp->softc = NULL;
 	g_eli_key_destroy(sc);
 	bzero(sc, sizeof(*sc));
 	free(sc, M_ELI);
 
 	if (pp == NULL || (pp->acr == 0 && pp->acw == 0 && pp->ace == 0))
 		G_ELI_DEBUG(0, "Device %s destroyed.", gp->name);
 	g_wither_geom_close(gp, ENXIO);
 
 	return (0);
 }
 
 static int
 g_eli_destroy_geom(struct gctl_req *req __unused,
     struct g_class *mp __unused, struct g_geom *gp)
 {
 	struct g_eli_softc *sc;
 
 	sc = gp->softc;
 	return (g_eli_destroy(sc, FALSE));
 }
 
 static int
 g_eli_keyfiles_load(struct hmac_ctx *ctx, const char *provider)
 {
 	u_char *keyfile, *data;
 	char *file, name[64];
 	size_t size;
 	int i;
 
 	for (i = 0; ; i++) {
 		snprintf(name, sizeof(name), "%s:geli_keyfile%d", provider, i);
 		keyfile = preload_search_by_type(name);
 		if (keyfile == NULL && i == 0) {
 			/*
 			 * If there is only one keyfile, allow simpler name.
 			 */
 			snprintf(name, sizeof(name), "%s:geli_keyfile", provider);
 			keyfile = preload_search_by_type(name);
 		}
 		if (keyfile == NULL)
 			return (i);	/* Return number of loaded keyfiles. */
 		data = preload_fetch_addr(keyfile);
 		if (data == NULL) {
 			G_ELI_DEBUG(0, "Cannot find key file data for %s.",
 			    name);
 			return (0);
 		}
 		size = preload_fetch_size(keyfile);
 		if (size == 0) {
 			G_ELI_DEBUG(0, "Cannot find key file size for %s.",
 			    name);
 			return (0);
 		}
 		file = preload_search_info(keyfile, MODINFO_NAME);
 		if (file == NULL) {
 			G_ELI_DEBUG(0, "Cannot find key file name for %s.",
 			    name);
 			return (0);
 		}
 		G_ELI_DEBUG(1, "Loaded keyfile %s for %s (type: %s).", file,
 		    provider, name);
 		g_eli_crypto_hmac_update(ctx, data, size);
 	}
 }
 
 static void
 g_eli_keyfiles_clear(const char *provider)
 {
 	u_char *keyfile, *data;
 	char name[64];
 	size_t size;
 	int i;
 
 	for (i = 0; ; i++) {
 		snprintf(name, sizeof(name), "%s:geli_keyfile%d", provider, i);
 		keyfile = preload_search_by_type(name);
 		if (keyfile == NULL)
 			return;
 		data = preload_fetch_addr(keyfile);
 		size = preload_fetch_size(keyfile);
 		if (data != NULL && size != 0)
 			bzero(data, size);
 	}
 }
 
 /*
  * Tasting is only made on boot.
  * We detect providers which should be attached before root is mounted.
  */
 static struct g_geom *
 g_eli_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
 {
 	struct g_eli_metadata md;
 	struct g_geom *gp;
 	struct hmac_ctx ctx;
 	char passphrase[256];
 	u_char key[G_ELI_USERKEYLEN], mkey[G_ELI_DATAIVKEYLEN];
 	u_int i, nkey, nkeyfiles, tries, showpass;
 	int error;
         struct keybuf *keybuf;
 
 	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
 	g_topology_assert();
 
 	if (root_mounted() || g_eli_tries == 0)
 		return (NULL);
 
 	G_ELI_DEBUG(3, "Tasting %s.", pp->name);
 
 	error = g_eli_read_metadata(mp, pp, &md);
 	if (error != 0)
 		return (NULL);
 	gp = NULL;
 
 	if (strcmp(md.md_magic, G_ELI_MAGIC) != 0)
 		return (NULL);
 	if (md.md_version > G_ELI_VERSION) {
 		printf("geom_eli.ko module is too old to handle %s.\n",
 		    pp->name);
 		return (NULL);
 	}
 	if (md.md_provsize != pp->mediasize)
 		return (NULL);
 	/* Should we attach it on boot? */
 	if (!(md.md_flags & G_ELI_FLAG_BOOT))
 		return (NULL);
 	if (md.md_keys == 0x00) {
 		G_ELI_DEBUG(0, "No valid keys on %s.", pp->name);
 		return (NULL);
 	}
 	if (md.md_iterations == -1) {
 		/* If there is no passphrase, we try only once. */
 		tries = 1;
 	} else {
 		/* Ask for the passphrase no more than g_eli_tries times. */
 		tries = g_eli_tries;
 	}
 
         if ((keybuf = get_keybuf()) != NULL) {
                 /* Scan the key buffer, try all GELI keys. */
                 for (i = 0; i < keybuf->kb_nents; i++) {
                          if (keybuf->kb_ents[i].ke_type == KEYBUF_TYPE_GELI) {
                                  memcpy(key, keybuf->kb_ents[i].ke_data,
                                      sizeof(key));
 
                                  if (g_eli_mkey_decrypt_any(&md, key,
                                      mkey, &nkey) == 0 ) {
                                          explicit_bzero(key, sizeof(key));
                                          goto have_key;
                                  }
                          }
                 }
         }
 
         for (i = 0; i <= tries; i++) {
                 g_eli_crypto_hmac_init(&ctx, NULL, 0);
 
                 /*
                  * Load all key files.
                  */
                 nkeyfiles = g_eli_keyfiles_load(&ctx, pp->name);
 
                 if (nkeyfiles == 0 && md.md_iterations == -1) {
                         /*
                          * No key files and no passphrase, something is
                          * definitely wrong here.
                          * geli(8) doesn't allow for such situation, so assume
                          * that there was really no passphrase and in that case
                          * key files are no properly defined in loader.conf.
                          */
                         G_ELI_DEBUG(0,
                             "Found no key files in loader.conf for %s.",
                             pp->name);
                         return (NULL);
                 }
 
                 /* Ask for the passphrase if defined. */
                 if (md.md_iterations >= 0) {
                         /* Try first with cached passphrase. */
                         if (i == 0) {
                                 if (!g_eli_boot_passcache)
                                         continue;
                                 memcpy(passphrase, cached_passphrase,
                                     sizeof(passphrase));
                         } else {
                                 printf("Enter passphrase for %s: ", pp->name);
 				showpass = g_eli_visible_passphrase;
 				if ((md.md_flags & G_ELI_FLAG_GELIDISPLAYPASS) != 0)
 					showpass = GETS_ECHOPASS;
                                 cngets(passphrase, sizeof(passphrase),
 				    showpass);
                                 memcpy(cached_passphrase, passphrase,
                                     sizeof(passphrase));
                         }
                 }
 
                 /*
                  * Prepare Derived-Key from the user passphrase.
                  */
                 if (md.md_iterations == 0) {
                         g_eli_crypto_hmac_update(&ctx, md.md_salt,
                             sizeof(md.md_salt));
                         g_eli_crypto_hmac_update(&ctx, passphrase,
                             strlen(passphrase));
                         explicit_bzero(passphrase, sizeof(passphrase));
                 } else if (md.md_iterations > 0) {
                         u_char dkey[G_ELI_USERKEYLEN];
 
                         pkcs5v2_genkey(dkey, sizeof(dkey), md.md_salt,
                             sizeof(md.md_salt), passphrase, md.md_iterations);
                         bzero(passphrase, sizeof(passphrase));
                         g_eli_crypto_hmac_update(&ctx, dkey, sizeof(dkey));
                         explicit_bzero(dkey, sizeof(dkey));
                 }
 
                 g_eli_crypto_hmac_final(&ctx, key, 0);
 
                 /*
                  * Decrypt Master-Key.
                  */
                 error = g_eli_mkey_decrypt_any(&md, key, mkey, &nkey);
                 bzero(key, sizeof(key));
                 if (error == -1) {
                         if (i == tries) {
                                 G_ELI_DEBUG(0,
                                     "Wrong key for %s. No tries left.",
                                     pp->name);
                                 g_eli_keyfiles_clear(pp->name);
                                 return (NULL);
                         }
                         if (i > 0) {
                                 G_ELI_DEBUG(0,
                                     "Wrong key for %s. Tries left: %u.",
                                     pp->name, tries - i);
                         }
                         /* Try again. */
                         continue;
                 } else if (error > 0) {
                         G_ELI_DEBUG(0,
                             "Cannot decrypt Master Key for %s (error=%d).",
                             pp->name, error);
                         g_eli_keyfiles_clear(pp->name);
                         return (NULL);
                 }
                 g_eli_keyfiles_clear(pp->name);
                 G_ELI_DEBUG(1, "Using Master Key %u for %s.", nkey, pp->name);
                 break;
         }
 have_key:
 
 	/*
 	 * We have correct key, let's attach provider.
 	 */
 	gp = g_eli_create(NULL, mp, pp, &md, mkey, nkey);
 	bzero(mkey, sizeof(mkey));
 	bzero(&md, sizeof(md));
 	if (gp == NULL) {
 		G_ELI_DEBUG(0, "Cannot create device %s%s.", pp->name,
 		    G_ELI_SUFFIX);
 		return (NULL);
 	}
 	return (gp);
 }
 
 static void
 g_eli_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
     struct g_consumer *cp, struct g_provider *pp)
 {
 	struct g_eli_softc *sc;
 
 	g_topology_assert();
 	sc = gp->softc;
 	if (sc == NULL)
 		return;
 	if (pp != NULL || cp != NULL)
 		return;	/* Nothing here. */
 
 	sbuf_printf(sb, "%s<KeysTotal>%ju</KeysTotal>\n", indent,
 	    (uintmax_t)sc->sc_ekeys_total);
 	sbuf_printf(sb, "%s<KeysAllocated>%ju</KeysAllocated>\n", indent,
 	    (uintmax_t)sc->sc_ekeys_allocated);
 	sbuf_printf(sb, "%s<Flags>", indent);
 	if (sc->sc_flags == 0)
 		sbuf_cat(sb, "NONE");
 	else {
 		int first = 1;
 
 #define ADD_FLAG(flag, name)	do {					\
 	if (sc->sc_flags & (flag)) {					\
 		if (!first)						\
 			sbuf_cat(sb, ", ");				\
 		else							\
 			first = 0;					\
 		sbuf_cat(sb, name);					\
 	}								\
 } while (0)
 		ADD_FLAG(G_ELI_FLAG_SUSPEND, "SUSPEND");
 		ADD_FLAG(G_ELI_FLAG_SINGLE_KEY, "SINGLE-KEY");
 		ADD_FLAG(G_ELI_FLAG_NATIVE_BYTE_ORDER, "NATIVE-BYTE-ORDER");
 		ADD_FLAG(G_ELI_FLAG_ONETIME, "ONETIME");
 		ADD_FLAG(G_ELI_FLAG_BOOT, "BOOT");
 		ADD_FLAG(G_ELI_FLAG_WO_DETACH, "W-DETACH");
 		ADD_FLAG(G_ELI_FLAG_RW_DETACH, "RW-DETACH");
 		ADD_FLAG(G_ELI_FLAG_AUTH, "AUTH");
 		ADD_FLAG(G_ELI_FLAG_WOPEN, "W-OPEN");
 		ADD_FLAG(G_ELI_FLAG_DESTROY, "DESTROY");
 		ADD_FLAG(G_ELI_FLAG_RO, "READ-ONLY");
 		ADD_FLAG(G_ELI_FLAG_NODELETE, "NODELETE");
 		ADD_FLAG(G_ELI_FLAG_GELIBOOT, "GELIBOOT");
 		ADD_FLAG(G_ELI_FLAG_GELIDISPLAYPASS, "GELIDISPLAYPASS");
 		ADD_FLAG(G_ELI_FLAG_AUTORESIZE, "AUTORESIZE");
 #undef  ADD_FLAG
 	}
 	sbuf_cat(sb, "</Flags>\n");
 
 	if (!(sc->sc_flags & G_ELI_FLAG_ONETIME)) {
 		sbuf_printf(sb, "%s<UsedKey>%u</UsedKey>\n", indent,
 		    sc->sc_nkey);
 	}
 	sbuf_printf(sb, "%s<Version>%u</Version>\n", indent, sc->sc_version);
 	sbuf_printf(sb, "%s<Crypto>", indent);
 	switch (sc->sc_crypto) {
 	case G_ELI_CRYPTO_HW:
 		sbuf_cat(sb, "hardware");
 		break;
 	case G_ELI_CRYPTO_SW:
 		sbuf_cat(sb, "software");
 		break;
 	default:
 		sbuf_cat(sb, "UNKNOWN");
 		break;
 	}
 	sbuf_cat(sb, "</Crypto>\n");
 	if (sc->sc_flags & G_ELI_FLAG_AUTH) {
 		sbuf_printf(sb,
 		    "%s<AuthenticationAlgorithm>%s</AuthenticationAlgorithm>\n",
 		    indent, g_eli_algo2str(sc->sc_aalgo));
 	}
 	sbuf_printf(sb, "%s<KeyLength>%u</KeyLength>\n", indent,
 	    sc->sc_ekeylen);
 	sbuf_printf(sb, "%s<EncryptionAlgorithm>%s</EncryptionAlgorithm>\n",
 	    indent, g_eli_algo2str(sc->sc_ealgo));
 	sbuf_printf(sb, "%s<State>%s</State>\n", indent,
 	    (sc->sc_flags & G_ELI_FLAG_SUSPEND) ? "SUSPENDED" : "ACTIVE");
 }
 
 static void
 g_eli_shutdown_pre_sync(void *arg, int howto)
 {
 	struct g_class *mp;
 	struct g_geom *gp, *gp2;
 	struct g_provider *pp;
 	struct g_eli_softc *sc;
 	int error;
 
 	mp = arg;
 	g_topology_lock();
 	LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) {
 		sc = gp->softc;
 		if (sc == NULL)
 			continue;
 		pp = LIST_FIRST(&gp->provider);
 		KASSERT(pp != NULL, ("No provider? gp=%p (%s)", gp, gp->name));
 		if (pp->acr + pp->acw + pp->ace == 0)
 			error = g_eli_destroy(sc, TRUE);
 		else {
 			sc->sc_flags |= G_ELI_FLAG_RW_DETACH;
 			gp->access = g_eli_access;
 		}
 	}
 	g_topology_unlock();
 }
 
 static void
 g_eli_init(struct g_class *mp)
 {
 
 	g_eli_pre_sync = EVENTHANDLER_REGISTER(shutdown_pre_sync,
 	    g_eli_shutdown_pre_sync, mp, SHUTDOWN_PRI_FIRST);
 	if (g_eli_pre_sync == NULL)
 		G_ELI_DEBUG(0, "Warning! Cannot register shutdown event.");
 }
 
 static void
 g_eli_fini(struct g_class *mp)
 {
 
 	if (g_eli_pre_sync != NULL)
 		EVENTHANDLER_DEREGISTER(shutdown_pre_sync, g_eli_pre_sync);
 }
 
 DECLARE_GEOM_CLASS(g_eli_class, g_eli);
 MODULE_DEPEND(g_eli, crypto, 1, 1, 1);
 MODULE_VERSION(geom_eli, 0);
Index: head/sys/geom/eli/g_eli.h
===================================================================
--- head/sys/geom/eli/g_eli.h	(revision 350693)
+++ head/sys/geom/eli/g_eli.h	(revision 350694)
@@ -1,741 +1,723 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2005-2019 Pawel Jakub Dawidek <pawel@dawidek.net>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef	_G_ELI_H_
 #define	_G_ELI_H_
 
 #include <sys/endian.h>
 #include <sys/errno.h>
 #include <sys/malloc.h>
 #include <crypto/sha2/sha256.h>
 #include <crypto/sha2/sha512.h>
 #include <opencrypto/cryptodev.h>
 #ifdef _KERNEL
 #include <sys/bio.h>
 #include <sys/libkern.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <geom/geom.h>
 #include <crypto/intake.h>
 #else
 #include <assert.h>
 #include <stdio.h>
 #include <string.h>
 #include <strings.h>
 #endif
 #include <sys/queue.h>
 #include <sys/tree.h>
 #ifndef _OpenSSL_
 #include <sys/md5.h>
 #endif
 
 #define	G_ELI_CLASS_NAME	"ELI"
 #define	G_ELI_MAGIC		"GEOM::ELI"
 #define	G_ELI_SUFFIX		".eli"
 
 /*
  * Version history:
  * 0 - Initial version number.
  * 1 - Added data authentication support (md_aalgo field and
  *     G_ELI_FLAG_AUTH flag).
  * 2 - Added G_ELI_FLAG_READONLY.
  * 3 - Added 'configure' subcommand.
  * 4 - IV is generated from offset converted to little-endian
  *     (the G_ELI_FLAG_NATIVE_BYTE_ORDER flag will be set for older versions).
  * 5 - Added multiple encrypton keys and AES-XTS support.
  * 6 - Fixed usage of multiple keys for authenticated providers (the
  *     G_ELI_FLAG_FIRST_KEY flag will be set for older versions).
  * 7 - Encryption keys are now generated from the Data Key and not from the
  *     IV Key (the G_ELI_FLAG_ENC_IVKEY flag will be set for older versions).
  */
 #define	G_ELI_VERSION_00	0
 #define	G_ELI_VERSION_01	1
 #define	G_ELI_VERSION_02	2
 #define	G_ELI_VERSION_03	3
 #define	G_ELI_VERSION_04	4
 #define	G_ELI_VERSION_05	5
 #define	G_ELI_VERSION_06	6
 #define	G_ELI_VERSION_07	7
 #define	G_ELI_VERSION		G_ELI_VERSION_07
 
 /* ON DISK FLAGS. */
 /* Use random, onetime keys. */
 #define	G_ELI_FLAG_ONETIME		0x00000001
 /* Ask for the passphrase from the kernel, before mounting root. */
 #define	G_ELI_FLAG_BOOT			0x00000002
 /* Detach on last close, if we were open for writing. */
 #define	G_ELI_FLAG_WO_DETACH		0x00000004
 /* Detach on last close. */
 #define	G_ELI_FLAG_RW_DETACH		0x00000008
 /* Provide data authentication. */
 #define	G_ELI_FLAG_AUTH			0x00000010
 /* Provider is read-only, we should deny all write attempts. */
 #define	G_ELI_FLAG_RO			0x00000020
 /* Don't pass through BIO_DELETE requests. */
 #define	G_ELI_FLAG_NODELETE		0x00000040
 /* This GELI supports GELIBoot */
 #define	G_ELI_FLAG_GELIBOOT		0x00000080
 /* Hide passphrase length in GELIboot. */
 #define	G_ELI_FLAG_GELIDISPLAYPASS	0x00000100
 /* Expand provider automatically. */
 #define	G_ELI_FLAG_AUTORESIZE		0x00000200
 
 /* RUNTIME FLAGS. */
 /* Provider was open for writing. */
 #define	G_ELI_FLAG_WOPEN		0x00010000
 /* Destroy device. */
 #define	G_ELI_FLAG_DESTROY		0x00020000
 /* Provider uses native byte-order for IV generation. */
 #define	G_ELI_FLAG_NATIVE_BYTE_ORDER	0x00040000
 /* Provider uses single encryption key. */
 #define	G_ELI_FLAG_SINGLE_KEY		0x00080000
 /* Device suspended. */
 #define	G_ELI_FLAG_SUSPEND		0x00100000
 /* Provider uses first encryption key. */
 #define	G_ELI_FLAG_FIRST_KEY		0x00200000
 /* Provider uses IV-Key for encryption key generation. */
 #define	G_ELI_FLAG_ENC_IVKEY		0x00400000
 
 #define	G_ELI_NEW_BIO	255
 
 #define	SHA512_MDLEN		64
 #define	G_ELI_AUTH_SECKEYLEN	SHA256_DIGEST_LENGTH
 
 #define	G_ELI_MAXMKEYS		2
 #define	G_ELI_MAXKEYLEN		64
 #define	G_ELI_USERKEYLEN	G_ELI_MAXKEYLEN
 #define	G_ELI_DATAKEYLEN	G_ELI_MAXKEYLEN
 #define	G_ELI_AUTHKEYLEN	G_ELI_MAXKEYLEN
 #define	G_ELI_IVKEYLEN		G_ELI_MAXKEYLEN
 #define	G_ELI_SALTLEN		64
 #define	G_ELI_DATAIVKEYLEN	(G_ELI_DATAKEYLEN + G_ELI_IVKEYLEN)
 /* Data-Key, IV-Key, HMAC_SHA512(Derived-Key, Data-Key+IV-Key) */
 #define	G_ELI_MKEYLEN		(G_ELI_DATAIVKEYLEN + SHA512_MDLEN)
 #define	G_ELI_OVERWRITES	5
 /* Switch data encryption key every 2^20 blocks. */
 #define	G_ELI_KEY_SHIFT		20
 
 #define	G_ELI_CRYPTO_UNKNOWN	0
 #define	G_ELI_CRYPTO_HW		1
 #define	G_ELI_CRYPTO_SW		2
 
 #ifdef _KERNEL
 #if (MAX_KEY_BYTES < G_ELI_DATAIVKEYLEN)
 #error "MAX_KEY_BYTES is less than G_ELI_DATAKEYLEN"
 #endif
 
 extern int g_eli_debug;
 extern u_int g_eli_overwrites;
 extern u_int g_eli_batch;
 
-#define	G_ELI_DEBUG(lvl, ...)	do {					\
-	if (g_eli_debug >= (lvl)) {					\
-		printf("GEOM_ELI");					\
-		if (g_eli_debug > 0)					\
-			printf("[%u]", lvl);				\
-		printf(": ");						\
-		printf(__VA_ARGS__);					\
-		printf("\n");						\
-	}								\
-} while (0)
-#define	G_ELI_LOGREQ(lvl, bp, ...)	do {				\
-	if (g_eli_debug >= (lvl)) {					\
-		printf("GEOM_ELI");					\
-		if (g_eli_debug > 0)					\
-			printf("[%u]", lvl);				\
-		printf(": ");						\
-		printf(__VA_ARGS__);					\
-		printf(" ");						\
-		g_print_bio(bp);					\
-		printf("\n");						\
-	}								\
-} while (0)
+#define	G_ELI_DEBUG(lvl, ...) \
+    _GEOM_DEBUG("GEOM_ELI", g_eli_debug, (lvl), NULL, __VA_ARGS__)
+#define	G_ELI_LOGREQ(lvl, bp, ...) \
+    _GEOM_DEBUG("GEOM_ELI", g_eli_debug, (lvl), (bp), __VA_ARGS__)
 
 struct g_eli_worker {
 	struct g_eli_softc	*w_softc;
 	struct proc		*w_proc;
 	u_int			 w_number;
 	crypto_session_t	 w_sid;
 	boolean_t		 w_active;
 	LIST_ENTRY(g_eli_worker) w_next;
 };
 
 #endif	/* _KERNEL */
 
 struct g_eli_softc {
 	struct g_geom	*sc_geom;
 	u_int		 sc_version;
 	u_int		 sc_crypto;
 	uint8_t		 sc_mkey[G_ELI_DATAIVKEYLEN];
 	uint8_t		 sc_ekey[G_ELI_DATAKEYLEN];
 	TAILQ_HEAD(, g_eli_key) sc_ekeys_queue;
 	RB_HEAD(g_eli_key_tree, g_eli_key) sc_ekeys_tree;
 	struct mtx	 sc_ekeys_lock;
 	uint64_t	 sc_ekeys_total;
 	uint64_t	 sc_ekeys_allocated;
 	u_int		 sc_ealgo;
 	u_int		 sc_ekeylen;
 	uint8_t		 sc_akey[G_ELI_AUTHKEYLEN];
 	u_int		 sc_aalgo;
 	u_int		 sc_akeylen;
 	u_int		 sc_alen;
 	SHA256_CTX	 sc_akeyctx;
 	uint8_t		 sc_ivkey[G_ELI_IVKEYLEN];
 	SHA256_CTX	 sc_ivctx;
 	int		 sc_nkey;
 	uint32_t	 sc_flags;
 	int		 sc_inflight;
 	off_t		 sc_mediasize;
 	size_t		 sc_sectorsize;
 	off_t		 sc_provsize;
 	u_int		 sc_bytes_per_sector;
 	u_int		 sc_data_per_sector;
 #ifndef _KERNEL
 	int		 sc_cpubind;
 #else /* _KERNEL */
 	boolean_t	 sc_cpubind;
 
 	/* Only for software cryptography. */
 	struct bio_queue_head sc_queue;
 	struct mtx	 sc_queue_mtx;
 	LIST_HEAD(, g_eli_worker) sc_workers;
 #endif /* _KERNEL */
 };
 #define	sc_name		 sc_geom->name
 
 #define	G_ELI_KEY_MAGIC	0xe11341c
 
 struct g_eli_key {
 	/* Key value, must be first in the structure. */
 	uint8_t		gek_key[G_ELI_DATAKEYLEN];
 	/* Magic. */
 	int		gek_magic;
 	/* Key number. */
 	uint64_t	gek_keyno;
 	/* Reference counter. */
 	int		gek_count;
 	/* Keeps keys sorted by most recent use. */
 	TAILQ_ENTRY(g_eli_key) gek_next;
 	/* Keeps keys sorted by number. */
 	RB_ENTRY(g_eli_key) gek_link;
 };
 
 struct g_eli_metadata {
 	char		md_magic[16];	/* Magic value. */
 	uint32_t	md_version;	/* Version number. */
 	uint32_t	md_flags;	/* Additional flags. */
 	uint16_t	md_ealgo;	/* Encryption algorithm. */
 	uint16_t	md_keylen;	/* Key length. */
 	uint16_t	md_aalgo;	/* Authentication algorithm. */
 	uint64_t	md_provsize;	/* Provider's size. */
 	uint32_t	md_sectorsize;	/* Sector size. */
 	uint8_t		md_keys;	/* Available keys. */
 	int32_t		md_iterations;	/* Number of iterations for PKCS#5v2. */
 	uint8_t		md_salt[G_ELI_SALTLEN]; /* Salt. */
 			/* Encrypted master key (IV-key, Data-key, HMAC). */
 	uint8_t		md_mkeys[G_ELI_MAXMKEYS * G_ELI_MKEYLEN];
 	u_char		md_hash[16];	/* MD5 hash. */
 } __packed;
 #ifndef _OpenSSL_
 static __inline void
 eli_metadata_encode_v0(struct g_eli_metadata *md, u_char **datap)
 {
 	u_char *p;
 
 	p = *datap;
 	le32enc(p, md->md_flags);	p += sizeof(md->md_flags);
 	le16enc(p, md->md_ealgo);	p += sizeof(md->md_ealgo);
 	le16enc(p, md->md_keylen);	p += sizeof(md->md_keylen);
 	le64enc(p, md->md_provsize);	p += sizeof(md->md_provsize);
 	le32enc(p, md->md_sectorsize);	p += sizeof(md->md_sectorsize);
 	*p = md->md_keys;		p += sizeof(md->md_keys);
 	le32enc(p, md->md_iterations);	p += sizeof(md->md_iterations);
 	bcopy(md->md_salt, p, sizeof(md->md_salt)); p += sizeof(md->md_salt);
 	bcopy(md->md_mkeys, p, sizeof(md->md_mkeys)); p += sizeof(md->md_mkeys);
 	*datap = p;
 }
 static __inline void
 eli_metadata_encode_v1v2v3v4v5v6v7(struct g_eli_metadata *md, u_char **datap)
 {
 	u_char *p;
 
 	p = *datap;
 	le32enc(p, md->md_flags);	p += sizeof(md->md_flags);
 	le16enc(p, md->md_ealgo);	p += sizeof(md->md_ealgo);
 	le16enc(p, md->md_keylen);	p += sizeof(md->md_keylen);
 	le16enc(p, md->md_aalgo);	p += sizeof(md->md_aalgo);
 	le64enc(p, md->md_provsize);	p += sizeof(md->md_provsize);
 	le32enc(p, md->md_sectorsize);	p += sizeof(md->md_sectorsize);
 	*p = md->md_keys;		p += sizeof(md->md_keys);
 	le32enc(p, md->md_iterations);	p += sizeof(md->md_iterations);
 	bcopy(md->md_salt, p, sizeof(md->md_salt)); p += sizeof(md->md_salt);
 	bcopy(md->md_mkeys, p, sizeof(md->md_mkeys)); p += sizeof(md->md_mkeys);
 	*datap = p;
 }
 static __inline void
 eli_metadata_encode(struct g_eli_metadata *md, u_char *data)
 {
 	uint32_t hash[4];
 	MD5_CTX ctx;
 	u_char *p;
 
 	p = data;
 	bcopy(md->md_magic, p, sizeof(md->md_magic));
 	p += sizeof(md->md_magic);
 	le32enc(p, md->md_version);
 	p += sizeof(md->md_version);
 	switch (md->md_version) {
 	case G_ELI_VERSION_00:
 		eli_metadata_encode_v0(md, &p);
 		break;
 	case G_ELI_VERSION_01:
 	case G_ELI_VERSION_02:
 	case G_ELI_VERSION_03:
 	case G_ELI_VERSION_04:
 	case G_ELI_VERSION_05:
 	case G_ELI_VERSION_06:
 	case G_ELI_VERSION_07:
 		eli_metadata_encode_v1v2v3v4v5v6v7(md, &p);
 		break;
 	default:
 #ifdef _KERNEL
 		panic("%s: Unsupported version %u.", __func__,
 		    (u_int)md->md_version);
 #else
 		assert(!"Unsupported metadata version.");
 #endif
 	}
 	MD5Init(&ctx);
 	MD5Update(&ctx, data, p - data);
 	MD5Final((void *)hash, &ctx);
 	bcopy(hash, md->md_hash, sizeof(md->md_hash));
 	bcopy(md->md_hash, p, sizeof(md->md_hash));
 }
 static __inline int
 eli_metadata_decode_v0(const u_char *data, struct g_eli_metadata *md)
 {
 	uint32_t hash[4];
 	MD5_CTX ctx;
 	const u_char *p;
 
 	p = data + sizeof(md->md_magic) + sizeof(md->md_version);
 	md->md_flags = le32dec(p);	p += sizeof(md->md_flags);
 	md->md_ealgo = le16dec(p);	p += sizeof(md->md_ealgo);
 	md->md_keylen = le16dec(p);	p += sizeof(md->md_keylen);
 	md->md_provsize = le64dec(p);	p += sizeof(md->md_provsize);
 	md->md_sectorsize = le32dec(p);	p += sizeof(md->md_sectorsize);
 	md->md_keys = *p;		p += sizeof(md->md_keys);
 	md->md_iterations = le32dec(p);	p += sizeof(md->md_iterations);
 	bcopy(p, md->md_salt, sizeof(md->md_salt)); p += sizeof(md->md_salt);
 	bcopy(p, md->md_mkeys, sizeof(md->md_mkeys)); p += sizeof(md->md_mkeys);
 	MD5Init(&ctx);
 	MD5Update(&ctx, data, p - data);
 	MD5Final((void *)hash, &ctx);
 	bcopy(hash, md->md_hash, sizeof(md->md_hash));
 	if (bcmp(md->md_hash, p, 16) != 0)
 		return (EINVAL);
 	return (0);
 }
 
 static __inline int
 eli_metadata_decode_v1v2v3v4v5v6v7(const u_char *data, struct g_eli_metadata *md)
 {
 	uint32_t hash[4];
 	MD5_CTX ctx;
 	const u_char *p;
 
 	p = data + sizeof(md->md_magic) + sizeof(md->md_version);
 	md->md_flags = le32dec(p);	p += sizeof(md->md_flags);
 	md->md_ealgo = le16dec(p);	p += sizeof(md->md_ealgo);
 	md->md_keylen = le16dec(p);	p += sizeof(md->md_keylen);
 	md->md_aalgo = le16dec(p);	p += sizeof(md->md_aalgo);
 	md->md_provsize = le64dec(p);	p += sizeof(md->md_provsize);
 	md->md_sectorsize = le32dec(p);	p += sizeof(md->md_sectorsize);
 	md->md_keys = *p;		p += sizeof(md->md_keys);
 	md->md_iterations = le32dec(p);	p += sizeof(md->md_iterations);
 	bcopy(p, md->md_salt, sizeof(md->md_salt)); p += sizeof(md->md_salt);
 	bcopy(p, md->md_mkeys, sizeof(md->md_mkeys)); p += sizeof(md->md_mkeys);
 	MD5Init(&ctx);
 	MD5Update(&ctx, data, p - data);
 	MD5Final((void *)hash, &ctx);
 	bcopy(hash, md->md_hash, sizeof(md->md_hash));
 	if (bcmp(md->md_hash, p, 16) != 0)
 		return (EINVAL);
 	return (0);
 }
 static __inline int
 eli_metadata_decode(const u_char *data, struct g_eli_metadata *md)
 {
 	int error;
 
 	bcopy(data, md->md_magic, sizeof(md->md_magic));
 	if (strcmp(md->md_magic, G_ELI_MAGIC) != 0)
 		return (EINVAL);
 	md->md_version = le32dec(data + sizeof(md->md_magic));
 	switch (md->md_version) {
 	case G_ELI_VERSION_00:
 		error = eli_metadata_decode_v0(data, md);
 		break;
 	case G_ELI_VERSION_01:
 	case G_ELI_VERSION_02:
 	case G_ELI_VERSION_03:
 	case G_ELI_VERSION_04:
 	case G_ELI_VERSION_05:
 	case G_ELI_VERSION_06:
 	case G_ELI_VERSION_07:
 		error = eli_metadata_decode_v1v2v3v4v5v6v7(data, md);
 		break;
 	default:
 		error = EOPNOTSUPP;
 		break;
 	}
 	return (error);
 }
 #endif	/* !_OpenSSL */
 
 static __inline u_int
 g_eli_str2ealgo(const char *name)
 {
 
 	if (strcasecmp("null", name) == 0)
 		return (CRYPTO_NULL_CBC);
 	else if (strcasecmp("null-cbc", name) == 0)
 		return (CRYPTO_NULL_CBC);
 	else if (strcasecmp("aes", name) == 0)
 		return (CRYPTO_AES_XTS);
 	else if (strcasecmp("aes-cbc", name) == 0)
 		return (CRYPTO_AES_CBC);
 	else if (strcasecmp("aes-xts", name) == 0)
 		return (CRYPTO_AES_XTS);
 	else if (strcasecmp("blowfish", name) == 0)
 		return (CRYPTO_BLF_CBC);
 	else if (strcasecmp("blowfish-cbc", name) == 0)
 		return (CRYPTO_BLF_CBC);
 	else if (strcasecmp("camellia", name) == 0)
 		return (CRYPTO_CAMELLIA_CBC);
 	else if (strcasecmp("camellia-cbc", name) == 0)
 		return (CRYPTO_CAMELLIA_CBC);
 	else if (strcasecmp("3des", name) == 0)
 		return (CRYPTO_3DES_CBC);
 	else if (strcasecmp("3des-cbc", name) == 0)
 		return (CRYPTO_3DES_CBC);
 	return (CRYPTO_ALGORITHM_MIN - 1);
 }
 
 static __inline u_int
 g_eli_str2aalgo(const char *name)
 {
 
 	if (strcasecmp("hmac/md5", name) == 0)
 		return (CRYPTO_MD5_HMAC);
 	else if (strcasecmp("hmac/sha1", name) == 0)
 		return (CRYPTO_SHA1_HMAC);
 	else if (strcasecmp("hmac/ripemd160", name) == 0)
 		return (CRYPTO_RIPEMD160_HMAC);
 	else if (strcasecmp("hmac/sha256", name) == 0)
 		return (CRYPTO_SHA2_256_HMAC);
 	else if (strcasecmp("hmac/sha384", name) == 0)
 		return (CRYPTO_SHA2_384_HMAC);
 	else if (strcasecmp("hmac/sha512", name) == 0)
 		return (CRYPTO_SHA2_512_HMAC);
 	return (CRYPTO_ALGORITHM_MIN - 1);
 }
 
 static __inline const char *
 g_eli_algo2str(u_int algo)
 {
 
 	switch (algo) {
 	case CRYPTO_NULL_CBC:
 		return ("NULL");
 	case CRYPTO_AES_CBC:
 		return ("AES-CBC");
 	case CRYPTO_AES_XTS:
 		return ("AES-XTS");
 	case CRYPTO_BLF_CBC:
 		return ("Blowfish-CBC");
 	case CRYPTO_CAMELLIA_CBC:
 		return ("CAMELLIA-CBC");
 	case CRYPTO_3DES_CBC:
 		return ("3DES-CBC");
 	case CRYPTO_MD5_HMAC:
 		return ("HMAC/MD5");
 	case CRYPTO_SHA1_HMAC:
 		return ("HMAC/SHA1");
 	case CRYPTO_RIPEMD160_HMAC:
 		return ("HMAC/RIPEMD160");
 	case CRYPTO_SHA2_256_HMAC:
 		return ("HMAC/SHA256");
 	case CRYPTO_SHA2_384_HMAC:
 		return ("HMAC/SHA384");
 	case CRYPTO_SHA2_512_HMAC:
 		return ("HMAC/SHA512");
 	}
 	return ("unknown");
 }
 
 static __inline void
 eli_metadata_dump(const struct g_eli_metadata *md)
 {
 	static const char hex[] = "0123456789abcdef";
 	char str[sizeof(md->md_mkeys) * 2 + 1];
 	u_int i;
 
 	printf("     magic: %s\n", md->md_magic);
 	printf("   version: %u\n", (u_int)md->md_version);
 	printf("     flags: 0x%x\n", (u_int)md->md_flags);
 	printf("     ealgo: %s\n", g_eli_algo2str(md->md_ealgo));
 	printf("    keylen: %u\n", (u_int)md->md_keylen);
 	if (md->md_flags & G_ELI_FLAG_AUTH)
 		printf("     aalgo: %s\n", g_eli_algo2str(md->md_aalgo));
 	printf("  provsize: %ju\n", (uintmax_t)md->md_provsize);
 	printf("sectorsize: %u\n", (u_int)md->md_sectorsize);
 	printf("      keys: 0x%02x\n", (u_int)md->md_keys);
 	printf("iterations: %d\n", (int)md->md_iterations);
 	bzero(str, sizeof(str));
 	for (i = 0; i < sizeof(md->md_salt); i++) {
 		str[i * 2] = hex[md->md_salt[i] >> 4];
 		str[i * 2 + 1] = hex[md->md_salt[i] & 0x0f];
 	}
 	printf("      Salt: %s\n", str);
 	bzero(str, sizeof(str));
 	for (i = 0; i < sizeof(md->md_mkeys); i++) {
 		str[i * 2] = hex[md->md_mkeys[i] >> 4];
 		str[i * 2 + 1] = hex[md->md_mkeys[i] & 0x0f];
 	}
 	printf("Master Key: %s\n", str);
 	bzero(str, sizeof(str));
 	for (i = 0; i < 16; i++) {
 		str[i * 2] = hex[md->md_hash[i] >> 4];
 		str[i * 2 + 1] = hex[md->md_hash[i] & 0x0f];
 	}
 	printf("  MD5 hash: %s\n", str);
 }
 
 static __inline u_int
 g_eli_keylen(u_int algo, u_int keylen)
 {
 
 	switch (algo) {
 	case CRYPTO_NULL_CBC:
 		if (keylen == 0)
 			keylen = 64 * 8;
 		else {
 			if (keylen > 64 * 8)
 				keylen = 0;
 		}
 		return (keylen);
 	case CRYPTO_AES_CBC:
 	case CRYPTO_CAMELLIA_CBC:
 		switch (keylen) {
 		case 0:
 			return (128);
 		case 128:
 		case 192:
 		case 256:
 			return (keylen);
 		default:
 			return (0);
 		}
 	case CRYPTO_AES_XTS:
 		switch (keylen) {
 		case 0:
 			return (128);
 		case 128:
 		case 256:
 			return (keylen);
 		default:
 			return (0);
 		}
 	case CRYPTO_BLF_CBC:
 		if (keylen == 0)
 			return (128);
 		if (keylen < 128 || keylen > 448)
 			return (0);
 		if ((keylen % 32) != 0)
 			return (0);
 		return (keylen);
 	case CRYPTO_3DES_CBC:
 		if (keylen == 0 || keylen == 192)
 			return (192);
 		return (0);
 	default:
 		return (0);
 	}
 }
 
 static __inline u_int
 g_eli_hashlen(u_int algo)
 {
 
 	switch (algo) {
 	case CRYPTO_MD5_HMAC:
 		return (16);
 	case CRYPTO_SHA1_HMAC:
 		return (20);
 	case CRYPTO_RIPEMD160_HMAC:
 		return (20);
 	case CRYPTO_SHA2_256_HMAC:
 		return (32);
 	case CRYPTO_SHA2_384_HMAC:
 		return (48);
 	case CRYPTO_SHA2_512_HMAC:
 		return (64);
 	}
 	return (0);
 }
 
 static __inline off_t
 eli_mediasize(const struct g_eli_softc *sc, off_t mediasize, u_int sectorsize)
 {
 
 	if ((sc->sc_flags & G_ELI_FLAG_ONETIME) == 0) {
 		mediasize -= sectorsize;
 	}
 	if ((sc->sc_flags & G_ELI_FLAG_AUTH) == 0) {
 		mediasize -= (mediasize % sc->sc_sectorsize);
 	} else {
 		mediasize /= sc->sc_bytes_per_sector;
 		mediasize *= sc->sc_sectorsize;
 	}
 
 	return (mediasize);
 }
 
 static __inline void
 eli_metadata_softc(struct g_eli_softc *sc, const struct g_eli_metadata *md,
     u_int sectorsize, off_t mediasize)
 {
 
 	sc->sc_version = md->md_version;
 	sc->sc_inflight = 0;
 	sc->sc_crypto = G_ELI_CRYPTO_UNKNOWN;
 	sc->sc_flags = md->md_flags;
 	/* Backward compatibility. */
 	if (md->md_version < G_ELI_VERSION_04)
 		sc->sc_flags |= G_ELI_FLAG_NATIVE_BYTE_ORDER;
 	if (md->md_version < G_ELI_VERSION_05)
 		sc->sc_flags |= G_ELI_FLAG_SINGLE_KEY;
 	if (md->md_version < G_ELI_VERSION_06 &&
 	    (sc->sc_flags & G_ELI_FLAG_AUTH) != 0) {
 		sc->sc_flags |= G_ELI_FLAG_FIRST_KEY;
 	}
 	if (md->md_version < G_ELI_VERSION_07)
 		sc->sc_flags |= G_ELI_FLAG_ENC_IVKEY;
 	sc->sc_ealgo = md->md_ealgo;
 
 	if (sc->sc_flags & G_ELI_FLAG_AUTH) {
 		sc->sc_akeylen = sizeof(sc->sc_akey) * 8;
 		sc->sc_aalgo = md->md_aalgo;
 		sc->sc_alen = g_eli_hashlen(sc->sc_aalgo);
 
 		sc->sc_data_per_sector = sectorsize - sc->sc_alen;
 		/*
 		 * Some hash functions (like SHA1 and RIPEMD160) generates hash
 		 * which length is not multiple of 128 bits, but we want data
 		 * length to be multiple of 128, so we can encrypt without
 		 * padding. The line below rounds down data length to multiple
 		 * of 128 bits.
 		 */
 		sc->sc_data_per_sector -= sc->sc_data_per_sector % 16;
 
 		sc->sc_bytes_per_sector =
 		    (md->md_sectorsize - 1) / sc->sc_data_per_sector + 1;
 		sc->sc_bytes_per_sector *= sectorsize;
 	}
 	sc->sc_provsize = mediasize;
 	sc->sc_sectorsize = md->md_sectorsize;
 	sc->sc_mediasize = eli_mediasize(sc, mediasize, sectorsize);
 	sc->sc_ekeylen = md->md_keylen;
 }
 
 #ifdef _KERNEL
 int g_eli_read_metadata(struct g_class *mp, struct g_provider *pp,
     struct g_eli_metadata *md);
 struct g_geom *g_eli_create(struct gctl_req *req, struct g_class *mp,
     struct g_provider *bpp, const struct g_eli_metadata *md,
     const u_char *mkey, int nkey);
 int g_eli_destroy(struct g_eli_softc *sc, boolean_t force);
 
 int g_eli_access(struct g_provider *pp, int dr, int dw, int de);
 void g_eli_config(struct gctl_req *req, struct g_class *mp, const char *verb);
 
 void g_eli_read_done(struct bio *bp);
 void g_eli_write_done(struct bio *bp);
 int g_eli_crypto_rerun(struct cryptop *crp);
 
 void g_eli_crypto_read(struct g_eli_softc *sc, struct bio *bp, boolean_t fromworker);
 void g_eli_crypto_run(struct g_eli_worker *wr, struct bio *bp);
 
 void g_eli_auth_read(struct g_eli_softc *sc, struct bio *bp);
 void g_eli_auth_run(struct g_eli_worker *wr, struct bio *bp);
 #endif
 void g_eli_crypto_ivgen(struct g_eli_softc *sc, off_t offset, u_char *iv,
     size_t size);
 
 void g_eli_mkey_hmac(unsigned char *mkey, const unsigned char *key);
 int g_eli_mkey_decrypt(const struct g_eli_metadata *md,
     const unsigned char *key, unsigned char *mkey, unsigned keyp);
 int g_eli_mkey_decrypt_any(const struct g_eli_metadata *md,
     const unsigned char *key, unsigned char *mkey, unsigned *nkeyp);
 int g_eli_mkey_encrypt(unsigned algo, const unsigned char *key, unsigned keylen,
     unsigned char *mkey);
 #ifdef _KERNEL
 void g_eli_mkey_propagate(struct g_eli_softc *sc, const unsigned char *mkey);
 #endif
 
 int g_eli_crypto_encrypt(u_int algo, u_char *data, size_t datasize,
     const u_char *key, size_t keysize);
 int g_eli_crypto_decrypt(u_int algo, u_char *data, size_t datasize,
     const u_char *key, size_t keysize);
 
 struct hmac_ctx {
 	SHA512_CTX	innerctx;
 	SHA512_CTX	outerctx;
 };
 
 void g_eli_crypto_hmac_init(struct hmac_ctx *ctx, const char *hkey,
     size_t hkeylen);
 void g_eli_crypto_hmac_update(struct hmac_ctx *ctx, const uint8_t *data,
     size_t datasize);
 void g_eli_crypto_hmac_final(struct hmac_ctx *ctx, uint8_t *md, size_t mdsize);
 void g_eli_crypto_hmac(const char *hkey, size_t hkeysize,
     const uint8_t *data, size_t datasize, uint8_t *md, size_t mdsize);
 
 void g_eli_key_fill(struct g_eli_softc *sc, struct g_eli_key *key,
     uint64_t keyno);
 #ifdef _KERNEL
 void g_eli_key_init(struct g_eli_softc *sc);
 void g_eli_key_destroy(struct g_eli_softc *sc);
 void g_eli_key_resize(struct g_eli_softc *sc);
 uint8_t *g_eli_key_hold(struct g_eli_softc *sc, off_t offset, size_t blocksize);
 void g_eli_key_drop(struct g_eli_softc *sc, uint8_t *rawkey);
 #endif
 #endif	/* !_G_ELI_H_ */
Index: head/sys/geom/eli/g_eli_ctl.c
===================================================================
--- head/sys/geom/eli/g_eli_ctl.c	(revision 350693)
+++ head/sys/geom/eli/g_eli_ctl.c	(revision 350694)
@@ -1,1229 +1,1230 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2005-2011 Pawel Jakub Dawidek <pawel@dawidek.net>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/bio.h>
 #include <sys/sysctl.h>
 #include <sys/malloc.h>
 #include <sys/kthread.h>
 #include <sys/proc.h>
 #include <sys/sched.h>
 #include <sys/uio.h>
 
 #include <vm/uma.h>
 
 #include <geom/geom.h>
+#include <geom/geom_dbg.h>
 #include <geom/eli/g_eli.h>
 
 
 MALLOC_DECLARE(M_ELI);
 
 
 static void
 g_eli_ctl_attach(struct gctl_req *req, struct g_class *mp)
 {
 	struct g_eli_metadata md;
 	struct g_provider *pp;
 	const char *name;
 	u_char *key, mkey[G_ELI_DATAIVKEYLEN];
 	int *nargs, *detach, *readonly, *dryrunp;
 	int keysize, error, nkey, dryrun, dummy;
 	intmax_t *valp;
 
 	g_topology_assert();
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "No '%s' argument.", "nargs");
 		return;
 	}
 	if (*nargs != 1) {
 		gctl_error(req, "Invalid number of arguments.");
 		return;
 	}
 
 	detach = gctl_get_paraml(req, "detach", sizeof(*detach));
 	if (detach == NULL) {
 		gctl_error(req, "No '%s' argument.", "detach");
 		return;
 	}
 
 	/* "keyno" is optional for backward compatibility */
 	nkey = -1;
 	valp = gctl_get_param(req, "keyno", &dummy);
 	if (valp != NULL) {
 		valp = gctl_get_paraml(req, "keyno", sizeof(*valp));
 		if (valp != NULL)
 			nkey = *valp;
 	}
 	if (nkey < -1 || nkey >= G_ELI_MAXMKEYS) {
 		gctl_error(req, "Invalid '%s' argument.", "keyno");
 		return;
 	}
 
 	readonly = gctl_get_paraml(req, "readonly", sizeof(*readonly));
 	if (readonly == NULL) {
 		gctl_error(req, "No '%s' argument.", "readonly");
 		return;
 	}
 
 	/* "dryrun" is optional for backward compatibility */
 	dryrun = 0;
 	dryrunp = gctl_get_param(req, "dryrun", &dummy);
 	if (dryrunp != NULL) {
 		dryrunp = gctl_get_paraml(req, "dryrun", sizeof(*dryrunp));
 		if (dryrunp != NULL)
 			dryrun = *dryrunp;
 	}
 
 	if (*detach && *readonly) {
 		gctl_error(req, "Options -d and -r are mutually exclusive.");
 		return;
 	}
 
 	name = gctl_get_asciiparam(req, "arg0");
 	if (name == NULL) {
 		gctl_error(req, "No 'arg%u' argument.", 0);
 		return;
 	}
 	if (strncmp(name, "/dev/", strlen("/dev/")) == 0)
 		name += strlen("/dev/");
 	pp = g_provider_by_name(name);
 	if (pp == NULL) {
 		gctl_error(req, "Provider %s is invalid.", name);
 		return;
 	}
 	error = g_eli_read_metadata(mp, pp, &md);
 	if (error != 0) {
 		gctl_error(req, "Cannot read metadata from %s (error=%d).",
 		    name, error);
 		return;
 	}
 	if (md.md_keys == 0x00) {
 		explicit_bzero(&md, sizeof(md));
 		gctl_error(req, "No valid keys on %s.", pp->name);
 		return;
 	}
 
 	key = gctl_get_param(req, "key", &keysize);
 	if (key == NULL || keysize != G_ELI_USERKEYLEN) {
 		explicit_bzero(&md, sizeof(md));
 		gctl_error(req, "No '%s' argument.", "key");
 		return;
 	}
 
 	if (nkey == -1)
 		error = g_eli_mkey_decrypt_any(&md, key, mkey, &nkey);
 	else
 		error = g_eli_mkey_decrypt(&md, key, mkey, nkey);
 	explicit_bzero(key, keysize);
 	if (error == -1) {
 		explicit_bzero(&md, sizeof(md));
 		gctl_error(req, "Wrong key for %s.", pp->name);
 		return;
 	} else if (error > 0) {
 		explicit_bzero(&md, sizeof(md));
 		gctl_error(req, "Cannot decrypt Master Key for %s (error=%d).",
 		    pp->name, error);
 		return;
 	}
 	G_ELI_DEBUG(1, "Using Master Key %u for %s.", nkey, pp->name);
 
 	if (*detach)
 		md.md_flags |= G_ELI_FLAG_WO_DETACH;
 	if (*readonly)
 		md.md_flags |= G_ELI_FLAG_RO;
 	if (!dryrun)
 		g_eli_create(req, mp, pp, &md, mkey, nkey);
 	explicit_bzero(mkey, sizeof(mkey));
 	explicit_bzero(&md, sizeof(md));
 }
 
 static struct g_eli_softc *
 g_eli_find_device(struct g_class *mp, const char *prov)
 {
 	struct g_eli_softc *sc;
 	struct g_geom *gp;
 	struct g_provider *pp;
 	struct g_consumer *cp;
 
 	if (strncmp(prov, "/dev/", strlen("/dev/")) == 0)
 		prov += strlen("/dev/");
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		sc = gp->softc;
 		if (sc == NULL)
 			continue;
 		pp = LIST_FIRST(&gp->provider);
 		if (pp != NULL && strcmp(pp->name, prov) == 0)
 			return (sc);
 		cp = LIST_FIRST(&gp->consumer);
 		if (cp != NULL && cp->provider != NULL &&
 		    strcmp(cp->provider->name, prov) == 0) {
 			return (sc);
 		}
 	}
 	return (NULL);
 }
 
 static void
 g_eli_ctl_detach(struct gctl_req *req, struct g_class *mp)
 {
 	struct g_eli_softc *sc;
 	int *force, *last, *nargs, error;
 	const char *prov;
 	char param[16];
 	int i;
 
 	g_topology_assert();
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "No '%s' argument.", "nargs");
 		return;
 	}
 	if (*nargs <= 0) {
 		gctl_error(req, "Missing device(s).");
 		return;
 	}
 	force = gctl_get_paraml(req, "force", sizeof(*force));
 	if (force == NULL) {
 		gctl_error(req, "No '%s' argument.", "force");
 		return;
 	}
 	last = gctl_get_paraml(req, "last", sizeof(*last));
 	if (last == NULL) {
 		gctl_error(req, "No '%s' argument.", "last");
 		return;
 	}
 
 	for (i = 0; i < *nargs; i++) {
 		snprintf(param, sizeof(param), "arg%d", i);
 		prov = gctl_get_asciiparam(req, param);
 		if (prov == NULL) {
 			gctl_error(req, "No 'arg%d' argument.", i);
 			return;
 		}
 		sc = g_eli_find_device(mp, prov);
 		if (sc == NULL) {
 			gctl_error(req, "No such device: %s.", prov);
 			return;
 		}
 		if (*last) {
 			sc->sc_flags |= G_ELI_FLAG_RW_DETACH;
 			sc->sc_geom->access = g_eli_access;
 		} else {
 			error = g_eli_destroy(sc, *force ? TRUE : FALSE);
 			if (error != 0) {
 				gctl_error(req,
 				    "Cannot destroy device %s (error=%d).",
 				    sc->sc_name, error);
 				return;
 			}
 		}
 	}
 }
 
 static void
 g_eli_ctl_onetime(struct gctl_req *req, struct g_class *mp)
 {
 	struct g_eli_metadata md;
 	struct g_provider *pp;
 	const char *name;
 	intmax_t *keylen, *sectorsize;
 	u_char mkey[G_ELI_DATAIVKEYLEN];
 	int *nargs, *detach, *noautoresize, *notrim;
 
 	g_topology_assert();
 	bzero(&md, sizeof(md));
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "No '%s' argument.", "nargs");
 		return;
 	}
 	if (*nargs != 1) {
 		gctl_error(req, "Invalid number of arguments.");
 		return;
 	}
 
 	strlcpy(md.md_magic, G_ELI_MAGIC, sizeof(md.md_magic));
 	md.md_version = G_ELI_VERSION;
 	md.md_flags |= G_ELI_FLAG_ONETIME;
 	md.md_flags |= G_ELI_FLAG_AUTORESIZE;
 
 	detach = gctl_get_paraml(req, "detach", sizeof(*detach));
 	if (detach != NULL && *detach)
 		md.md_flags |= G_ELI_FLAG_WO_DETACH;
 	noautoresize = gctl_get_paraml(req, "noautoresize",
 	    sizeof(*noautoresize));
 	if (noautoresize != NULL && *noautoresize)
 		md.md_flags &= ~G_ELI_FLAG_AUTORESIZE;
 	notrim = gctl_get_paraml(req, "notrim", sizeof(*notrim));
 	if (notrim != NULL && *notrim)
 		md.md_flags |= G_ELI_FLAG_NODELETE;
 
 	md.md_ealgo = CRYPTO_ALGORITHM_MIN - 1;
 	name = gctl_get_asciiparam(req, "aalgo");
 	if (name == NULL) {
 		gctl_error(req, "No '%s' argument.", "aalgo");
 		return;
 	}
 	if (*name != '\0') {
 		md.md_aalgo = g_eli_str2aalgo(name);
 		if (md.md_aalgo >= CRYPTO_ALGORITHM_MIN &&
 		    md.md_aalgo <= CRYPTO_ALGORITHM_MAX) {
 			md.md_flags |= G_ELI_FLAG_AUTH;
 		} else {
 			/*
 			 * For backward compatibility, check if the -a option
 			 * was used to provide encryption algorithm.
 			 */
 			md.md_ealgo = g_eli_str2ealgo(name);
 			if (md.md_ealgo < CRYPTO_ALGORITHM_MIN ||
 			    md.md_ealgo > CRYPTO_ALGORITHM_MAX) {
 				gctl_error(req,
 				    "Invalid authentication algorithm.");
 				return;
 			} else {
 				gctl_error(req, "warning: The -e option, not "
 				    "the -a option is now used to specify "
 				    "encryption algorithm to use.");
 			}
 		}
 	}
 
 	if (md.md_ealgo < CRYPTO_ALGORITHM_MIN ||
 	    md.md_ealgo > CRYPTO_ALGORITHM_MAX) {
 		name = gctl_get_asciiparam(req, "ealgo");
 		if (name == NULL) {
 			gctl_error(req, "No '%s' argument.", "ealgo");
 			return;
 		}
 		md.md_ealgo = g_eli_str2ealgo(name);
 		if (md.md_ealgo < CRYPTO_ALGORITHM_MIN ||
 		    md.md_ealgo > CRYPTO_ALGORITHM_MAX) {
 			gctl_error(req, "Invalid encryption algorithm.");
 			return;
 		}
 	}
 
 	keylen = gctl_get_paraml(req, "keylen", sizeof(*keylen));
 	if (keylen == NULL) {
 		gctl_error(req, "No '%s' argument.", "keylen");
 		return;
 	}
 	md.md_keylen = g_eli_keylen(md.md_ealgo, *keylen);
 	if (md.md_keylen == 0) {
 		gctl_error(req, "Invalid '%s' argument.", "keylen");
 		return;
 	}
 
 	/* Not important here. */
 	md.md_provsize = 0;
 	/* Not important here. */
 	bzero(md.md_salt, sizeof(md.md_salt));
 
 	md.md_keys = 0x01;
 	arc4rand(mkey, sizeof(mkey), 0);
 
 	/* Not important here. */
 	bzero(md.md_hash, sizeof(md.md_hash));
 
 	name = gctl_get_asciiparam(req, "arg0");
 	if (name == NULL) {
 		gctl_error(req, "No 'arg%u' argument.", 0);
 		return;
 	}
 	if (strncmp(name, "/dev/", strlen("/dev/")) == 0)
 		name += strlen("/dev/");
 	pp = g_provider_by_name(name);
 	if (pp == NULL) {
 		gctl_error(req, "Provider %s is invalid.", name);
 		return;
 	}
 
 	sectorsize = gctl_get_paraml(req, "sectorsize", sizeof(*sectorsize));
 	if (sectorsize == NULL) {
 		gctl_error(req, "No '%s' argument.", "sectorsize");
 		return;
 	}
 	if (*sectorsize == 0)
 		md.md_sectorsize = pp->sectorsize;
 	else {
 		if (*sectorsize < 0 || (*sectorsize % pp->sectorsize) != 0) {
 			gctl_error(req, "Invalid sector size.");
 			return;
 		}
 		if (*sectorsize > PAGE_SIZE) {
 			gctl_error(req, "warning: Using sectorsize bigger than "
 			    "the page size!");
 		}
 		md.md_sectorsize = *sectorsize;
 	}
 
 	g_eli_create(req, mp, pp, &md, mkey, -1);
 	explicit_bzero(mkey, sizeof(mkey));
 	explicit_bzero(&md, sizeof(md));
 }
 
 static void
 g_eli_ctl_configure(struct gctl_req *req, struct g_class *mp)
 {
 	struct g_eli_softc *sc;
 	struct g_eli_metadata md;
 	struct g_provider *pp;
 	struct g_consumer *cp;
 	char param[16];
 	const char *prov;
 	u_char *sector;
 	int *nargs, *boot, *noboot, *trim, *notrim, *geliboot, *nogeliboot;
 	int *displaypass, *nodisplaypass, *autoresize, *noautoresize;
 	int zero, error, changed;
 	u_int i;
 
 	g_topology_assert();
 
 	changed = 0;
 	zero = 0;
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "No '%s' argument.", "nargs");
 		return;
 	}
 	if (*nargs <= 0) {
 		gctl_error(req, "Missing device(s).");
 		return;
 	}
 
 	boot = gctl_get_paraml(req, "boot", sizeof(*boot));
 	if (boot == NULL)
 		boot = &zero;
 	noboot = gctl_get_paraml(req, "noboot", sizeof(*noboot));
 	if (noboot == NULL)
 		noboot = &zero;
 	if (*boot && *noboot) {
 		gctl_error(req, "Options -b and -B are mutually exclusive.");
 		return;
 	}
 	if (*boot || *noboot)
 		changed = 1;
 
 	trim = gctl_get_paraml(req, "trim", sizeof(*trim));
 	if (trim == NULL)
 		trim = &zero;
 	notrim = gctl_get_paraml(req, "notrim", sizeof(*notrim));
 	if (notrim == NULL)
 		notrim = &zero;
 	if (*trim && *notrim) {
 		gctl_error(req, "Options -t and -T are mutually exclusive.");
 		return;
 	}
 	if (*trim || *notrim)
 		changed = 1;
 
 	geliboot = gctl_get_paraml(req, "geliboot", sizeof(*geliboot));
 	if (geliboot == NULL)
 		geliboot = &zero;
 	nogeliboot = gctl_get_paraml(req, "nogeliboot", sizeof(*nogeliboot));
 	if (nogeliboot == NULL)
 		nogeliboot = &zero;
 	if (*geliboot && *nogeliboot) {
 		gctl_error(req, "Options -g and -G are mutually exclusive.");
 		return;
 	}
 	if (*geliboot || *nogeliboot)
 		changed = 1;
 
 	displaypass = gctl_get_paraml(req, "displaypass", sizeof(*displaypass));
 	if (displaypass == NULL)
 		displaypass = &zero;
 	nodisplaypass = gctl_get_paraml(req, "nodisplaypass", sizeof(*nodisplaypass));
 	if (nodisplaypass == NULL)
 		nodisplaypass = &zero;
 	if (*displaypass && *nodisplaypass) {
 		gctl_error(req, "Options -d and -D are mutually exclusive.");
 		return;
 	}
 	if (*displaypass || *nodisplaypass)
 		changed = 1;
 
 	autoresize = gctl_get_paraml(req, "autoresize", sizeof(*autoresize));
 	if (autoresize == NULL)
 		autoresize = &zero;
 	noautoresize = gctl_get_paraml(req, "noautoresize",
 	    sizeof(*noautoresize));
 	if (noautoresize == NULL)
 		noautoresize = &zero;
 	if (*autoresize && *noautoresize) {
 		gctl_error(req, "Options -r and -R are mutually exclusive.");
 		return;
 	}
 	if (*autoresize || *noautoresize)
 		changed = 1;
 
 	if (!changed) {
 		gctl_error(req, "No option given.");
 		return;
 	}
 
 	for (i = 0; i < *nargs; i++) {
 		snprintf(param, sizeof(param), "arg%d", i);
 		prov = gctl_get_asciiparam(req, param);
 		if (prov == NULL) {
 			gctl_error(req, "No 'arg%d' argument.", i);
 			return;
 		}
 		sc = g_eli_find_device(mp, prov);
 		if (sc == NULL) {
 			/*
 			 * We ignore not attached providers, userland part will
 			 * take care of them.
 			 */
 			G_ELI_DEBUG(1, "Skipping configuration of not attached "
 			    "provider %s.", prov);
 			continue;
 		}
 		if (sc->sc_flags & G_ELI_FLAG_RO) {
 			gctl_error(req, "Cannot change configuration of "
 			    "read-only provider %s.", prov);
 			continue;
 		}
 
 		if (*boot && (sc->sc_flags & G_ELI_FLAG_BOOT)) {
 			G_ELI_DEBUG(1, "BOOT flag already configured for %s.",
 			    prov);
 			continue;
 		} else if (*noboot && !(sc->sc_flags & G_ELI_FLAG_BOOT)) {
 			G_ELI_DEBUG(1, "BOOT flag not configured for %s.",
 			    prov);
 			continue;
 		}
 
 		if (*notrim && (sc->sc_flags & G_ELI_FLAG_NODELETE)) {
 			G_ELI_DEBUG(1, "TRIM disable flag already configured for %s.",
 			    prov);
 			continue;
 		} else if (*trim && !(sc->sc_flags & G_ELI_FLAG_NODELETE)) {
 			G_ELI_DEBUG(1, "TRIM disable flag not configured for %s.",
 			    prov);
 			continue;
 		}
 
 		if (*geliboot && (sc->sc_flags & G_ELI_FLAG_GELIBOOT)) {
 			G_ELI_DEBUG(1, "GELIBOOT flag already configured for %s.",
 			    prov);
 			continue;
 		} else if (*nogeliboot && !(sc->sc_flags & G_ELI_FLAG_GELIBOOT)) {
 			G_ELI_DEBUG(1, "GELIBOOT flag not configured for %s.",
 			    prov);
 			continue;
 		}
 
 		if (*displaypass && (sc->sc_flags & G_ELI_FLAG_GELIDISPLAYPASS)) {
 			G_ELI_DEBUG(1, "GELIDISPLAYPASS flag already configured for %s.",
 			    prov);
 			continue;
 		} else if (*nodisplaypass &&
 		    !(sc->sc_flags & G_ELI_FLAG_GELIDISPLAYPASS)) {
 			G_ELI_DEBUG(1, "GELIDISPLAYPASS flag not configured for %s.",
 			    prov);
 			continue;
 		}
 
 		if (*autoresize && (sc->sc_flags & G_ELI_FLAG_AUTORESIZE)) {
 			G_ELI_DEBUG(1, "AUTORESIZE flag already configured for %s.",
 			    prov);
 			continue;
 		} else if (*noautoresize &&
 		    !(sc->sc_flags & G_ELI_FLAG_AUTORESIZE)) {
 			G_ELI_DEBUG(1, "AUTORESIZE flag not configured for %s.",
 			    prov);
 			continue;
 		}
 
 		if (!(sc->sc_flags & G_ELI_FLAG_ONETIME)) {
 			/*
 			 * ONETIME providers don't write metadata to
 			 * disk, so don't try reading it.  This means
 			 * we're bit-flipping uninitialized memory in md
 			 * below, but that's OK; we don't do anything
 			 * with it later.
 			 */
 			cp = LIST_FIRST(&sc->sc_geom->consumer);
 			pp = cp->provider;
 			error = g_eli_read_metadata(mp, pp, &md);
 			if (error != 0) {
 			    gctl_error(req,
 				"Cannot read metadata from %s (error=%d).",
 				prov, error);
 			    continue;
 			}
 		}
 
 		if (*boot) {
 			md.md_flags |= G_ELI_FLAG_BOOT;
 			sc->sc_flags |= G_ELI_FLAG_BOOT;
 		} else if (*noboot) {
 			md.md_flags &= ~G_ELI_FLAG_BOOT;
 			sc->sc_flags &= ~G_ELI_FLAG_BOOT;
 		}
 
 		if (*notrim) {
 			md.md_flags |= G_ELI_FLAG_NODELETE;
 			sc->sc_flags |= G_ELI_FLAG_NODELETE;
 		} else if (*trim) {
 			md.md_flags &= ~G_ELI_FLAG_NODELETE;
 			sc->sc_flags &= ~G_ELI_FLAG_NODELETE;
 		}
 
 		if (*geliboot) {
 			md.md_flags |= G_ELI_FLAG_GELIBOOT;
 			sc->sc_flags |= G_ELI_FLAG_GELIBOOT;
 		} else if (*nogeliboot) {
 			md.md_flags &= ~G_ELI_FLAG_GELIBOOT;
 			sc->sc_flags &= ~G_ELI_FLAG_GELIBOOT;
 		}
 
 		if (*displaypass) {
 			md.md_flags |= G_ELI_FLAG_GELIDISPLAYPASS;
 			sc->sc_flags |= G_ELI_FLAG_GELIDISPLAYPASS;
 		} else if (*nodisplaypass) {
 			md.md_flags &= ~G_ELI_FLAG_GELIDISPLAYPASS;
 			sc->sc_flags &= ~G_ELI_FLAG_GELIDISPLAYPASS;
 		}
 
 		if (*autoresize) {
 			md.md_flags |= G_ELI_FLAG_AUTORESIZE;
 			sc->sc_flags |= G_ELI_FLAG_AUTORESIZE;
 		} else if (*noautoresize) {
 			md.md_flags &= ~G_ELI_FLAG_AUTORESIZE;
 			sc->sc_flags &= ~G_ELI_FLAG_AUTORESIZE;
 		}
 
 		if (sc->sc_flags & G_ELI_FLAG_ONETIME) {
 			/* There's no metadata on disk so we are done here. */
 			continue;
 		}
 
 		sector = malloc(pp->sectorsize, M_ELI, M_WAITOK | M_ZERO);
 		eli_metadata_encode(&md, sector);
 		error = g_write_data(cp, pp->mediasize - pp->sectorsize, sector,
 		    pp->sectorsize);
 		if (error != 0) {
 			gctl_error(req,
 			    "Cannot store metadata on %s (error=%d).",
 			    prov, error);
 		}
 		explicit_bzero(&md, sizeof(md));
 		explicit_bzero(sector, pp->sectorsize);
 		free(sector, M_ELI);
 	}
 }
 
 static void
 g_eli_ctl_setkey(struct gctl_req *req, struct g_class *mp)
 {
 	struct g_eli_softc *sc;
 	struct g_eli_metadata md;
 	struct g_provider *pp;
 	struct g_consumer *cp;
 	const char *name;
 	u_char *key, *mkeydst, *sector;
 	intmax_t *valp;
 	int keysize, nkey, error;
 
 	g_topology_assert();
 
 	name = gctl_get_asciiparam(req, "arg0");
 	if (name == NULL) {
 		gctl_error(req, "No 'arg%u' argument.", 0);
 		return;
 	}
 	key = gctl_get_param(req, "key", &keysize);
 	if (key == NULL || keysize != G_ELI_USERKEYLEN) {
 		gctl_error(req, "No '%s' argument.", "key");
 		return;
 	}
 	sc = g_eli_find_device(mp, name);
 	if (sc == NULL) {
 		gctl_error(req, "Provider %s is invalid.", name);
 		return;
 	}
 	if (sc->sc_flags & G_ELI_FLAG_RO) {
 		gctl_error(req, "Cannot change keys for read-only provider.");
 		return;
 	}
 	cp = LIST_FIRST(&sc->sc_geom->consumer);
 	pp = cp->provider;
 
 	error = g_eli_read_metadata(mp, pp, &md);
 	if (error != 0) {
 		gctl_error(req, "Cannot read metadata from %s (error=%d).",
 		    name, error);
 		return;
 	}
 
 	valp = gctl_get_paraml(req, "keyno", sizeof(*valp));
 	if (valp == NULL) {
 		gctl_error(req, "No '%s' argument.", "keyno");
 		return;
 	}
 	if (*valp != -1)
 		nkey = *valp;
 	else
 		nkey = sc->sc_nkey;
 	if (nkey < 0 || nkey >= G_ELI_MAXMKEYS) {
 		gctl_error(req, "Invalid '%s' argument.", "keyno");
 		return;
 	}
 
 	valp = gctl_get_paraml(req, "iterations", sizeof(*valp));
 	if (valp == NULL) {
 		gctl_error(req, "No '%s' argument.", "iterations");
 		return;
 	}
 	/* Check if iterations number should and can be changed. */
 	if (*valp != -1 && md.md_iterations == -1) {
 		md.md_iterations = *valp;
 	} else if (*valp != -1 && *valp != md.md_iterations) {
 		if (bitcount32(md.md_keys) != 1) {
 			gctl_error(req, "To be able to use '-i' option, only "
 			    "one key can be defined.");
 			return;
 		}
 		if (md.md_keys != (1 << nkey)) {
 			gctl_error(req, "Only already defined key can be "
 			    "changed when '-i' option is used.");
 			return;
 		}
 		md.md_iterations = *valp;
 	}
 
 	mkeydst = md.md_mkeys + nkey * G_ELI_MKEYLEN;
 	md.md_keys |= (1 << nkey);
 
 	bcopy(sc->sc_mkey, mkeydst, sizeof(sc->sc_mkey));
 
 	/* Encrypt Master Key with the new key. */
 	error = g_eli_mkey_encrypt(md.md_ealgo, key, md.md_keylen, mkeydst);
 	explicit_bzero(key, keysize);
 	if (error != 0) {
 		explicit_bzero(&md, sizeof(md));
 		gctl_error(req, "Cannot encrypt Master Key (error=%d).", error);
 		return;
 	}
 
 	sector = malloc(pp->sectorsize, M_ELI, M_WAITOK | M_ZERO);
 	/* Store metadata with fresh key. */
 	eli_metadata_encode(&md, sector);
 	explicit_bzero(&md, sizeof(md));
 	error = g_write_data(cp, pp->mediasize - pp->sectorsize, sector,
 	    pp->sectorsize);
 	explicit_bzero(sector, pp->sectorsize);
 	free(sector, M_ELI);
 	if (error != 0) {
 		gctl_error(req, "Cannot store metadata on %s (error=%d).",
 		    pp->name, error);
 		return;
 	}
 	G_ELI_DEBUG(1, "Key %u changed on %s.", nkey, pp->name);
 }
 
 static void
 g_eli_ctl_delkey(struct gctl_req *req, struct g_class *mp)
 {
 	struct g_eli_softc *sc;
 	struct g_eli_metadata md;
 	struct g_provider *pp;
 	struct g_consumer *cp;
 	const char *name;
 	u_char *mkeydst, *sector;
 	intmax_t *valp;
 	size_t keysize;
 	int error, nkey, *all, *force;
 	u_int i;
 
 	g_topology_assert();
 
 	nkey = 0;	/* fixes causeless gcc warning */
 
 	name = gctl_get_asciiparam(req, "arg0");
 	if (name == NULL) {
 		gctl_error(req, "No 'arg%u' argument.", 0);
 		return;
 	}
 	sc = g_eli_find_device(mp, name);
 	if (sc == NULL) {
 		gctl_error(req, "Provider %s is invalid.", name);
 		return;
 	}
 	if (sc->sc_flags & G_ELI_FLAG_RO) {
 		gctl_error(req, "Cannot delete keys for read-only provider.");
 		return;
 	}
 	cp = LIST_FIRST(&sc->sc_geom->consumer);
 	pp = cp->provider;
 
 	error = g_eli_read_metadata(mp, pp, &md);
 	if (error != 0) {
 		gctl_error(req, "Cannot read metadata from %s (error=%d).",
 		    name, error);
 		return;
 	}
 
 	all = gctl_get_paraml(req, "all", sizeof(*all));
 	if (all == NULL) {
 		gctl_error(req, "No '%s' argument.", "all");
 		return;
 	}
 
 	if (*all) {
 		mkeydst = md.md_mkeys;
 		keysize = sizeof(md.md_mkeys);
 	} else {
 		force = gctl_get_paraml(req, "force", sizeof(*force));
 		if (force == NULL) {
 			gctl_error(req, "No '%s' argument.", "force");
 			return;
 		}
 
 		valp = gctl_get_paraml(req, "keyno", sizeof(*valp));
 		if (valp == NULL) {
 			gctl_error(req, "No '%s' argument.", "keyno");
 			return;
 		}
 		if (*valp != -1)
 			nkey = *valp;
 		else
 			nkey = sc->sc_nkey;
 		if (nkey < 0 || nkey >= G_ELI_MAXMKEYS) {
 			gctl_error(req, "Invalid '%s' argument.", "keyno");
 			return;
 		}
 		if (!(md.md_keys & (1 << nkey)) && !*force) {
 			gctl_error(req, "Master Key %u is not set.", nkey);
 			return;
 		}
 		md.md_keys &= ~(1 << nkey);
 		if (md.md_keys == 0 && !*force) {
 			gctl_error(req, "This is the last Master Key. Use '-f' "
 			    "flag if you really want to remove it.");
 			return;
 		}
 		mkeydst = md.md_mkeys + nkey * G_ELI_MKEYLEN;
 		keysize = G_ELI_MKEYLEN;
 	}
 
 	sector = malloc(pp->sectorsize, M_ELI, M_WAITOK | M_ZERO);
 	for (i = 0; i <= g_eli_overwrites; i++) {
 		if (i == g_eli_overwrites)
 			explicit_bzero(mkeydst, keysize);
 		else
 			arc4rand(mkeydst, keysize, 0);
 		/* Store metadata with destroyed key. */
 		eli_metadata_encode(&md, sector);
 		error = g_write_data(cp, pp->mediasize - pp->sectorsize, sector,
 		    pp->sectorsize);
 		if (error != 0) {
 			G_ELI_DEBUG(0, "Cannot store metadata on %s "
 			    "(error=%d).", pp->name, error);
 		}
 		/*
 		 * Flush write cache so we don't overwrite data N times in cache
 		 * and only once on disk.
 		 */
 		(void)g_io_flush(cp);
 	}
 	explicit_bzero(&md, sizeof(md));
 	explicit_bzero(sector, pp->sectorsize);
 	free(sector, M_ELI);
 	if (*all)
 		G_ELI_DEBUG(1, "All keys removed from %s.", pp->name);
 	else
 		G_ELI_DEBUG(1, "Key %d removed from %s.", nkey, pp->name);
 }
 
 static void
 g_eli_suspend_one(struct g_eli_softc *sc, struct gctl_req *req)
 {
 	struct g_eli_worker *wr;
 
 	g_topology_assert();
 
 	KASSERT(sc != NULL, ("NULL sc"));
 
 	if (sc->sc_flags & G_ELI_FLAG_ONETIME) {
 		gctl_error(req,
 		    "Device %s is using one-time key, suspend not supported.",
 		    sc->sc_name);
 		return;
 	}
 
 	mtx_lock(&sc->sc_queue_mtx);
 	if (sc->sc_flags & G_ELI_FLAG_SUSPEND) {
 		mtx_unlock(&sc->sc_queue_mtx);
 		gctl_error(req, "Device %s already suspended.",
 		    sc->sc_name);
 		return;
 	}
 	sc->sc_flags |= G_ELI_FLAG_SUSPEND;
 	wakeup(sc);
 	for (;;) {
 		LIST_FOREACH(wr, &sc->sc_workers, w_next) {
 			if (wr->w_active)
 				break;
 		}
 		if (wr == NULL)
 			break;
 		/* Not all threads suspended. */
 		msleep(&sc->sc_workers, &sc->sc_queue_mtx, PRIBIO,
 		    "geli:suspend", 0);
 	}
 	/*
 	 * Clear sensitive data on suspend, they will be recovered on resume.
 	 */
 	explicit_bzero(sc->sc_mkey, sizeof(sc->sc_mkey));
 	g_eli_key_destroy(sc);
 	explicit_bzero(sc->sc_akey, sizeof(sc->sc_akey));
 	explicit_bzero(&sc->sc_akeyctx, sizeof(sc->sc_akeyctx));
 	explicit_bzero(sc->sc_ivkey, sizeof(sc->sc_ivkey));
 	explicit_bzero(&sc->sc_ivctx, sizeof(sc->sc_ivctx));
 	mtx_unlock(&sc->sc_queue_mtx);
 	G_ELI_DEBUG(0, "Device %s has been suspended.", sc->sc_name);
 }
 
 static void
 g_eli_ctl_suspend(struct gctl_req *req, struct g_class *mp)
 {
 	struct g_eli_softc *sc;
 	int *all, *nargs;
 
 	g_topology_assert();
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "No '%s' argument.", "nargs");
 		return;
 	}
 	all = gctl_get_paraml(req, "all", sizeof(*all));
 	if (all == NULL) {
 		gctl_error(req, "No '%s' argument.", "all");
 		return;
 	}
 	if (!*all && *nargs == 0) {
 		gctl_error(req, "Too few arguments.");
 		return;
 	}
 
 	if (*all) {
 		struct g_geom *gp, *gp2;
 
 		LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) {
 			sc = gp->softc;
 			if (sc->sc_flags & G_ELI_FLAG_ONETIME) {
 				G_ELI_DEBUG(0,
 				    "Device %s is using one-time key, suspend not supported, skipping.",
 				    sc->sc_name);
 				continue;
 			}
 			g_eli_suspend_one(sc, req);
 		}
 	} else {
 		const char *prov;
 		char param[16];
 		int i;
 
 		for (i = 0; i < *nargs; i++) {
 			snprintf(param, sizeof(param), "arg%d", i);
 			prov = gctl_get_asciiparam(req, param);
 			if (prov == NULL) {
 				G_ELI_DEBUG(0, "No 'arg%d' argument.", i);
 				continue;
 			}
 
 			sc = g_eli_find_device(mp, prov);
 			if (sc == NULL) {
 				G_ELI_DEBUG(0, "No such provider: %s.", prov);
 				continue;
 			}
 			g_eli_suspend_one(sc, req);
 		}
 	}
 }
 
 static void
 g_eli_ctl_resume(struct gctl_req *req, struct g_class *mp)
 {
 	struct g_eli_metadata md;
 	struct g_eli_softc *sc;
 	struct g_provider *pp;
 	struct g_consumer *cp;
 	const char *name;
 	u_char *key, mkey[G_ELI_DATAIVKEYLEN];
 	int *nargs, keysize, error;
 	u_int nkey;
 
 	g_topology_assert();
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "No '%s' argument.", "nargs");
 		return;
 	}
 	if (*nargs != 1) {
 		gctl_error(req, "Invalid number of arguments.");
 		return;
 	}
 
 	name = gctl_get_asciiparam(req, "arg0");
 	if (name == NULL) {
 		gctl_error(req, "No 'arg%u' argument.", 0);
 		return;
 	}
 	key = gctl_get_param(req, "key", &keysize);
 	if (key == NULL || keysize != G_ELI_USERKEYLEN) {
 		gctl_error(req, "No '%s' argument.", "key");
 		return;
 	}
 	sc = g_eli_find_device(mp, name);
 	if (sc == NULL) {
 		gctl_error(req, "Provider %s is invalid.", name);
 		return;
 	}
 	cp = LIST_FIRST(&sc->sc_geom->consumer);
 	pp = cp->provider;
 	error = g_eli_read_metadata(mp, pp, &md);
 	if (error != 0) {
 		gctl_error(req, "Cannot read metadata from %s (error=%d).",
 		    name, error);
 		return;
 	}
 	if (md.md_keys == 0x00) {
 		explicit_bzero(&md, sizeof(md));
 		gctl_error(req, "No valid keys on %s.", pp->name);
 		return;
 	}
 
 	error = g_eli_mkey_decrypt_any(&md, key, mkey, &nkey);
 	explicit_bzero(key, keysize);
 	if (error == -1) {
 		explicit_bzero(&md, sizeof(md));
 		gctl_error(req, "Wrong key for %s.", pp->name);
 		return;
 	} else if (error > 0) {
 		explicit_bzero(&md, sizeof(md));
 		gctl_error(req, "Cannot decrypt Master Key for %s (error=%d).",
 		    pp->name, error);
 		return;
 	}
 	G_ELI_DEBUG(1, "Using Master Key %u for %s.", nkey, pp->name);
 
 	mtx_lock(&sc->sc_queue_mtx);
 	if (!(sc->sc_flags & G_ELI_FLAG_SUSPEND))
 		gctl_error(req, "Device %s is not suspended.", name);
 	else {
 		/* Restore sc_mkey, sc_ekeys, sc_akey and sc_ivkey. */
 		g_eli_mkey_propagate(sc, mkey);
 		sc->sc_flags &= ~G_ELI_FLAG_SUSPEND;
 		G_ELI_DEBUG(1, "Resumed %s.", pp->name);
 		wakeup(sc);
 	}
 	mtx_unlock(&sc->sc_queue_mtx);
 	explicit_bzero(mkey, sizeof(mkey));
 	explicit_bzero(&md, sizeof(md));
 }
 
 static int
 g_eli_kill_one(struct g_eli_softc *sc)
 {
 	struct g_provider *pp;
 	struct g_consumer *cp;
 	int error = 0;
 
 	g_topology_assert();
 
 	if (sc == NULL)
 		return (ENOENT);
 
 	pp = LIST_FIRST(&sc->sc_geom->provider);
 	g_error_provider(pp, ENXIO);
 
 	cp = LIST_FIRST(&sc->sc_geom->consumer);
 	pp = cp->provider;
 
 	if (sc->sc_flags & G_ELI_FLAG_RO) {
 		G_ELI_DEBUG(0, "WARNING: Metadata won't be erased on read-only "
 		    "provider: %s.", pp->name);
 	} else {
 		u_char *sector;
 		u_int i;
 		int err;
 
 		sector = malloc(pp->sectorsize, M_ELI, M_WAITOK);
 		for (i = 0; i <= g_eli_overwrites; i++) {
 			if (i == g_eli_overwrites)
 				bzero(sector, pp->sectorsize);
 			else
 				arc4rand(sector, pp->sectorsize, 0);
 			err = g_write_data(cp, pp->mediasize - pp->sectorsize,
 			    sector, pp->sectorsize);
 			if (err != 0) {
 				G_ELI_DEBUG(0, "Cannot erase metadata on %s "
 				    "(error=%d).", pp->name, err);
 				if (error == 0)
 					error = err;
 			}
 			/*
 			 * Flush write cache so we don't overwrite data N times
 			 * in cache and only once on disk.
 			 */
 			(void)g_io_flush(cp);
 		}
 		free(sector, M_ELI);
 	}
 	if (error == 0)
 		G_ELI_DEBUG(0, "%s has been killed.", pp->name);
 	g_eli_destroy(sc, TRUE);
 	return (error);
 }
 
 static void
 g_eli_ctl_kill(struct gctl_req *req, struct g_class *mp)
 {
 	int *all, *nargs;
 	int error;
 
 	g_topology_assert();
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "No '%s' argument.", "nargs");
 		return;
 	}
 	all = gctl_get_paraml(req, "all", sizeof(*all));
 	if (all == NULL) {
 		gctl_error(req, "No '%s' argument.", "all");
 		return;
 	}
 	if (!*all && *nargs == 0) {
 		gctl_error(req, "Too few arguments.");
 		return;
 	}
 
 	if (*all) {
 		struct g_geom *gp, *gp2;
 
 		LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) {
 			error = g_eli_kill_one(gp->softc);
 			if (error != 0)
 				gctl_error(req, "Not fully done.");
 		}
 	} else {
 		struct g_eli_softc *sc;
 		const char *prov;
 		char param[16];
 		int i;
 
 		for (i = 0; i < *nargs; i++) {
 			snprintf(param, sizeof(param), "arg%d", i);
 			prov = gctl_get_asciiparam(req, param);
 			if (prov == NULL) {
 				G_ELI_DEBUG(0, "No 'arg%d' argument.", i);
 				continue;
 			}
 
 			sc = g_eli_find_device(mp, prov);
 			if (sc == NULL) {
 				G_ELI_DEBUG(0, "No such provider: %s.", prov);
 				continue;
 			}
 			error = g_eli_kill_one(sc);
 			if (error != 0)
 				gctl_error(req, "Not fully done.");
 		}
 	}
 }
 
 void
 g_eli_config(struct gctl_req *req, struct g_class *mp, const char *verb)
 {
 	uint32_t *version;
 
 	g_topology_assert();
 
 	version = gctl_get_paraml(req, "version", sizeof(*version));
 	if (version == NULL) {
 		gctl_error(req, "No '%s' argument.", "version");
 		return;
 	}
 	while (*version != G_ELI_VERSION) {
 		if (G_ELI_VERSION == G_ELI_VERSION_06 &&
 		    *version == G_ELI_VERSION_05) {
 			/* Compatible. */
 			break;
 		}
 		if (G_ELI_VERSION == G_ELI_VERSION_07 &&
 		    (*version == G_ELI_VERSION_05 ||
 		     *version == G_ELI_VERSION_06)) {
 			/* Compatible. */
 			break;
 		}
 		gctl_error(req, "Userland and kernel parts are out of sync.");
 		return;
 	}
 
 	if (strcmp(verb, "attach") == 0)
 		g_eli_ctl_attach(req, mp);
 	else if (strcmp(verb, "detach") == 0 || strcmp(verb, "stop") == 0)
 		g_eli_ctl_detach(req, mp);
 	else if (strcmp(verb, "onetime") == 0)
 		g_eli_ctl_onetime(req, mp);
 	else if (strcmp(verb, "configure") == 0)
 		g_eli_ctl_configure(req, mp);
 	else if (strcmp(verb, "setkey") == 0)
 		g_eli_ctl_setkey(req, mp);
 	else if (strcmp(verb, "delkey") == 0)
 		g_eli_ctl_delkey(req, mp);
 	else if (strcmp(verb, "suspend") == 0)
 		g_eli_ctl_suspend(req, mp);
 	else if (strcmp(verb, "resume") == 0)
 		g_eli_ctl_resume(req, mp);
 	else if (strcmp(verb, "kill") == 0)
 		g_eli_ctl_kill(req, mp);
 	else
 		gctl_error(req, "Unknown verb.");
 }
Index: head/sys/geom/eli/g_eli_integrity.c
===================================================================
--- head/sys/geom/eli/g_eli_integrity.c	(revision 350693)
+++ head/sys/geom/eli/g_eli_integrity.c	(revision 350694)
@@ -1,540 +1,541 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2005-2011 Pawel Jakub Dawidek <pawel@dawidek.net>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/linker.h>
 #include <sys/module.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/bio.h>
 #include <sys/sysctl.h>
 #include <sys/malloc.h>
 #include <sys/kthread.h>
 #include <sys/proc.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 #include <sys/vnode.h>
 
 #include <vm/uma.h>
 
 #include <geom/geom.h>
+#include <geom/geom_dbg.h>
 #include <geom/eli/g_eli.h>
 #include <geom/eli/pkcs5v2.h>
 
 /*
  * The data layout description when integrity verification is configured.
  *
  * One of the most important assumption here is that authenticated data and its
  * HMAC has to be stored in the same place (namely in the same sector) to make
  * it work reliable.
  * The problem is that file systems work only with sectors that are multiple of
  * 512 bytes and a power of two number.
  * My idea to implement it is as follows.
  * Let's store HMAC in sector. This is a must. This leaves us 480 bytes for
  * data. We can't use that directly (ie. we can't create provider with 480 bytes
  * sector size). We need another sector from where we take only 32 bytes of data
  * and we store HMAC of this data as well. This takes two sectors from the
  * original provider at the input and leaves us one sector of authenticated data
  * at the output. Not very efficient, but you got the idea.
  * Now, let's assume, we want to create provider with 4096 bytes sector.
  * To output 4096 bytes of authenticated data we need 8x480 plus 1x256, so we
  * need nine 512-bytes sectors at the input to get one 4096-bytes sector at the
  * output. That's better. With 4096 bytes sector we can use 89% of size of the
  * original provider. I find it as an acceptable cost.
  * The reliability comes from the fact, that every HMAC stored inside the sector
  * is calculated only for the data in the same sector, so its impossible to
  * write new data and leave old HMAC or vice versa.
  *
  * And here is the picture:
  *
  * da0: +----+----+ +----+----+ +----+----+ +----+----+ +----+----+ +----+----+ +----+----+ +----+----+ +----+-----+
  *      |32b |480b| |32b |480b| |32b |480b| |32b |480b| |32b |480b| |32b |480b| |32b |480b| |32b |480b| |32b |256b |
  *      |HMAC|Data| |HMAC|Data| |HMAC|Data| |HMAC|Data| |HMAC|Data| |HMAC|Data| |HMAC|Data| |HMAC|Data| |HMAC|Data |
  *      +----+----+ +----+----+ +----+----+ +----+----+ +----+----+ +----+----+ +----+----+ +----+----+ +----+-----+
  *      |512 bytes| |512 bytes| |512 bytes| |512 bytes| |512 bytes| |512 bytes| |512 bytes| |512 bytes| |288 bytes |
  *      +---------+ +---------+ +---------+ +---------+ +---------+ +---------+ +---------+ +---------+ |224 unused|
  *                                                                                                      +----------+
  * da0.eli: +----+----+----+----+----+----+----+----+----+
  *          |480b|480b|480b|480b|480b|480b|480b|480b|256b|
  *          +----+----+----+----+----+----+----+----+----+
  *          |                 4096 bytes                 |
  *          +--------------------------------------------+
  *
  * PS. You can use any sector size with geli(8). My example is using 4kB,
  *     because it's most efficient. For 8kB sectors you need 2 extra sectors,
  *     so the cost is the same as for 4kB sectors.
  */
 
 /*
  * Code paths:
  * BIO_READ:
  *	g_eli_start -> g_eli_auth_read -> g_io_request -> g_eli_read_done -> g_eli_auth_run -> g_eli_auth_read_done -> g_io_deliver
  * BIO_WRITE:
  *	g_eli_start -> g_eli_auth_run -> g_eli_auth_write_done -> g_io_request -> g_eli_write_done -> g_io_deliver
  */
 
 MALLOC_DECLARE(M_ELI);
 
 /*
  * Here we generate key for HMAC. Every sector has its own HMAC key, so it is
  * not possible to copy sectors.
  * We cannot depend on fact, that every sector has its own IV, because different
  * IV doesn't change HMAC, when we use encrypt-then-authenticate method.
  */
 static void
 g_eli_auth_keygen(struct g_eli_softc *sc, off_t offset, u_char *key)
 {
 	SHA256_CTX ctx;
 
 	/* Copy precalculated SHA256 context. */
 	bcopy(&sc->sc_akeyctx, &ctx, sizeof(ctx));
 	SHA256_Update(&ctx, (uint8_t *)&offset, sizeof(offset));
 	SHA256_Final(key, &ctx);
 }
 
 /*
  * The function is called after we read and decrypt data.
  *
  * g_eli_start -> g_eli_auth_read -> g_io_request -> g_eli_read_done -> g_eli_auth_run -> G_ELI_AUTH_READ_DONE -> g_io_deliver
  */
 static int
 g_eli_auth_read_done(struct cryptop *crp)
 {
 	struct g_eli_softc *sc;
 	struct bio *bp;
 
 	if (crp->crp_etype == EAGAIN) {
 		if (g_eli_crypto_rerun(crp) == 0)
 			return (0);
 	}
 	bp = (struct bio *)crp->crp_opaque;
 	bp->bio_inbed++;
 	if (crp->crp_etype == 0) {
 		bp->bio_completed += crp->crp_olen;
 		G_ELI_DEBUG(3, "Crypto READ request done (%d/%d) (add=%jd completed=%jd).",
 		    bp->bio_inbed, bp->bio_children, (intmax_t)crp->crp_olen, (intmax_t)bp->bio_completed);
 	} else {
 		G_ELI_DEBUG(1, "Crypto READ request failed (%d/%d) error=%d.",
 		    bp->bio_inbed, bp->bio_children, crp->crp_etype);
 		if (bp->bio_error == 0)
 			bp->bio_error = crp->crp_etype;
 	}
 	sc = bp->bio_to->geom->softc;
 	g_eli_key_drop(sc, crp->crp_desc->crd_next->crd_key);
 	/*
 	 * Do we have all sectors already?
 	 */
 	if (bp->bio_inbed < bp->bio_children)
 		return (0);
 	if (bp->bio_error == 0) {
 		u_int i, lsec, nsec, data_secsize, decr_secsize, encr_secsize;
 		u_char *srcdata, *dstdata, *auth;
 		off_t coroff, corsize;
 
 		/*
 		 * Verify data integrity based on calculated and read HMACs.
 		 */
 		/* Sectorsize of decrypted provider eg. 4096. */
 		decr_secsize = bp->bio_to->sectorsize;
 		/* The real sectorsize of encrypted provider, eg. 512. */
 		encr_secsize = LIST_FIRST(&sc->sc_geom->consumer)->provider->sectorsize;
 		/* Number of data bytes in one encrypted sector, eg. 480. */
 		data_secsize = sc->sc_data_per_sector;
 		/* Number of sectors from decrypted provider, eg. 2. */
 		nsec = bp->bio_length / decr_secsize;
 		/* Number of sectors from encrypted provider, eg. 18. */
 		nsec = (nsec * sc->sc_bytes_per_sector) / encr_secsize;
 		/* Last sector number in every big sector, eg. 9. */
 		lsec = sc->sc_bytes_per_sector / encr_secsize;
 
 		srcdata = bp->bio_driver2;
 		dstdata = bp->bio_data;
 		auth = srcdata + encr_secsize * nsec;
 		coroff = -1;
 		corsize = 0;
 
 		for (i = 1; i <= nsec; i++) {
 			data_secsize = sc->sc_data_per_sector;
 			if ((i % lsec) == 0)
 				data_secsize = decr_secsize % data_secsize;
 			if (bcmp(srcdata, auth, sc->sc_alen) != 0) {
 				/*
 				 * Curruption detected, remember the offset if
 				 * this is the first corrupted sector and
 				 * increase size.
 				 */
 				if (bp->bio_error == 0)
 					bp->bio_error = -1;
 				if (coroff == -1) {
 					coroff = bp->bio_offset +
 					    (dstdata - (u_char *)bp->bio_data);
 				}
 				corsize += data_secsize;
 			} else {
 				/*
 				 * No curruption, good.
 				 * Report previous corruption if there was one.
 				 */
 				if (coroff != -1) {
 					G_ELI_DEBUG(0, "%s: Failed to authenticate %jd "
 					    "bytes of data at offset %jd.",
 					    sc->sc_name, (intmax_t)corsize,
 					    (intmax_t)coroff);
 					coroff = -1;
 					corsize = 0;
 				}
 				bcopy(srcdata + sc->sc_alen, dstdata,
 				    data_secsize);
 			}
 			srcdata += encr_secsize;
 			dstdata += data_secsize;
 			auth += sc->sc_alen;
 		}
 		/* Report previous corruption if there was one. */
 		if (coroff != -1) {
 			G_ELI_DEBUG(0, "%s: Failed to authenticate %jd "
 			    "bytes of data at offset %jd.",
 			    sc->sc_name, (intmax_t)corsize, (intmax_t)coroff);
 		}
 	}
 	free(bp->bio_driver2, M_ELI);
 	bp->bio_driver2 = NULL;
 	if (bp->bio_error != 0) {
 		if (bp->bio_error == -1)
 			bp->bio_error = EINVAL;
 		else {
 			G_ELI_LOGREQ(0, bp,
 			    "Crypto READ request failed (error=%d).",
 			    bp->bio_error);
 		}
 		bp->bio_completed = 0;
 	}
 	/*
 	 * Read is finished, send it up.
 	 */
 	g_io_deliver(bp, bp->bio_error);
 	atomic_subtract_int(&sc->sc_inflight, 1);
 	return (0);
 }
 
 /*
  * The function is called after data encryption.
  *
  * g_eli_start -> g_eli_auth_run -> G_ELI_AUTH_WRITE_DONE -> g_io_request -> g_eli_write_done -> g_io_deliver
  */
 static int
 g_eli_auth_write_done(struct cryptop *crp)
 {
 	struct g_eli_softc *sc;
 	struct g_consumer *cp;
 	struct bio *bp, *cbp, *cbp2;
 	u_int nsec;
 
 	if (crp->crp_etype == EAGAIN) {
 		if (g_eli_crypto_rerun(crp) == 0)
 			return (0);
 	}
 	bp = (struct bio *)crp->crp_opaque;
 	bp->bio_inbed++;
 	if (crp->crp_etype == 0) {
 		G_ELI_DEBUG(3, "Crypto WRITE request done (%d/%d).",
 		    bp->bio_inbed, bp->bio_children);
 	} else {
 		G_ELI_DEBUG(1, "Crypto WRITE request failed (%d/%d) error=%d.",
 		    bp->bio_inbed, bp->bio_children, crp->crp_etype);
 		if (bp->bio_error == 0)
 			bp->bio_error = crp->crp_etype;
 	}
 	sc = bp->bio_to->geom->softc;
 	g_eli_key_drop(sc, crp->crp_desc->crd_key);
 	/*
 	 * All sectors are already encrypted?
 	 */
 	if (bp->bio_inbed < bp->bio_children)
 		return (0);
 	if (bp->bio_error != 0) {
 		G_ELI_LOGREQ(0, bp, "Crypto WRITE request failed (error=%d).",
 		    bp->bio_error);
 		free(bp->bio_driver2, M_ELI);
 		bp->bio_driver2 = NULL;
 		cbp = bp->bio_driver1;
 		bp->bio_driver1 = NULL;
 		g_destroy_bio(cbp);
 		g_io_deliver(bp, bp->bio_error);
 		atomic_subtract_int(&sc->sc_inflight, 1);
 		return (0);
 	}
 	cp = LIST_FIRST(&sc->sc_geom->consumer);
 	cbp = bp->bio_driver1;
 	bp->bio_driver1 = NULL;
 	cbp->bio_to = cp->provider;
 	cbp->bio_done = g_eli_write_done;
 
 	/* Number of sectors from decrypted provider, eg. 1. */
 	nsec = bp->bio_length / bp->bio_to->sectorsize;
 	/* Number of sectors from encrypted provider, eg. 9. */
 	nsec = (nsec * sc->sc_bytes_per_sector) / cp->provider->sectorsize;
 
 	cbp->bio_length = cp->provider->sectorsize * nsec;
 	cbp->bio_offset = (bp->bio_offset / bp->bio_to->sectorsize) * sc->sc_bytes_per_sector;
 	cbp->bio_data = bp->bio_driver2;
 
 	/*
 	 * We write more than what is requested, so we have to be ready to write
 	 * more than MAXPHYS.
 	 */
 	cbp2 = NULL;
 	if (cbp->bio_length > MAXPHYS) {
 		cbp2 = g_duplicate_bio(bp);
 		cbp2->bio_length = cbp->bio_length - MAXPHYS;
 		cbp2->bio_data = cbp->bio_data + MAXPHYS;
 		cbp2->bio_offset = cbp->bio_offset + MAXPHYS;
 		cbp2->bio_to = cp->provider;
 		cbp2->bio_done = g_eli_write_done;
 		cbp->bio_length = MAXPHYS;
 	}
 	/*
 	 * Send encrypted data to the provider.
 	 */
 	G_ELI_LOGREQ(2, cbp, "Sending request.");
 	bp->bio_inbed = 0;
 	bp->bio_children = (cbp2 != NULL ? 2 : 1);
 	g_io_request(cbp, cp);
 	if (cbp2 != NULL) {
 		G_ELI_LOGREQ(2, cbp2, "Sending request.");
 		g_io_request(cbp2, cp);
 	}
 	return (0);
 }
 
 void
 g_eli_auth_read(struct g_eli_softc *sc, struct bio *bp)
 {
 	struct g_consumer *cp;
 	struct bio *cbp, *cbp2;
 	size_t size;
 	off_t nsec;
 
 	bp->bio_pflags = 0;
 
 	cp = LIST_FIRST(&sc->sc_geom->consumer);
 	cbp = bp->bio_driver1;
 	bp->bio_driver1 = NULL;
 	cbp->bio_to = cp->provider;
 	cbp->bio_done = g_eli_read_done;
 
 	/* Number of sectors from decrypted provider, eg. 1. */
 	nsec = bp->bio_length / bp->bio_to->sectorsize;
 	/* Number of sectors from encrypted provider, eg. 9. */
 	nsec = (nsec * sc->sc_bytes_per_sector) / cp->provider->sectorsize;
 
 	cbp->bio_length = cp->provider->sectorsize * nsec;
 	size = cbp->bio_length;
 	size += sc->sc_alen * nsec;
 	size += sizeof(struct cryptop) * nsec;
 	size += sizeof(struct cryptodesc) * nsec * 2;
 	size += G_ELI_AUTH_SECKEYLEN * nsec;
 	cbp->bio_offset = (bp->bio_offset / bp->bio_to->sectorsize) * sc->sc_bytes_per_sector;
 	bp->bio_driver2 = malloc(size, M_ELI, M_WAITOK);
 	cbp->bio_data = bp->bio_driver2;
 
 	/*
 	 * We read more than what is requested, so we have to be ready to read
 	 * more than MAXPHYS.
 	 */
 	cbp2 = NULL;
 	if (cbp->bio_length > MAXPHYS) {
 		cbp2 = g_duplicate_bio(bp);
 		cbp2->bio_length = cbp->bio_length - MAXPHYS;
 		cbp2->bio_data = cbp->bio_data + MAXPHYS;
 		cbp2->bio_offset = cbp->bio_offset + MAXPHYS;
 		cbp2->bio_to = cp->provider;
 		cbp2->bio_done = g_eli_read_done;
 		cbp->bio_length = MAXPHYS;
 	}
 	/*
 	 * Read encrypted data from provider.
 	 */
 	G_ELI_LOGREQ(2, cbp, "Sending request.");
 	g_io_request(cbp, cp);
 	if (cbp2 != NULL) {
 		G_ELI_LOGREQ(2, cbp2, "Sending request.");
 		g_io_request(cbp2, cp);
 	}
 }
 
 /*
  * This is the main function responsible for cryptography (ie. communication
  * with crypto(9) subsystem).
  *
  * BIO_READ:
  *	g_eli_start -> g_eli_auth_read -> g_io_request -> g_eli_read_done -> G_ELI_AUTH_RUN -> g_eli_auth_read_done -> g_io_deliver
  * BIO_WRITE:
  *	g_eli_start -> G_ELI_AUTH_RUN -> g_eli_auth_write_done -> g_io_request -> g_eli_write_done -> g_io_deliver
  */
 void
 g_eli_auth_run(struct g_eli_worker *wr, struct bio *bp)
 {
 	struct g_eli_softc *sc;
 	struct cryptop *crp;
 	struct cryptodesc *crde, *crda;
 	u_int i, lsec, nsec, data_secsize, decr_secsize, encr_secsize;
 	off_t dstoff;
 	u_char *p, *data, *auth, *authkey, *plaindata;
 	int error;
 
 	G_ELI_LOGREQ(3, bp, "%s", __func__);
 
 	bp->bio_pflags = wr->w_number;
 	sc = wr->w_softc;
 	/* Sectorsize of decrypted provider eg. 4096. */
 	decr_secsize = bp->bio_to->sectorsize;
 	/* The real sectorsize of encrypted provider, eg. 512. */
 	encr_secsize = LIST_FIRST(&sc->sc_geom->consumer)->provider->sectorsize;
 	/* Number of data bytes in one encrypted sector, eg. 480. */
 	data_secsize = sc->sc_data_per_sector;
 	/* Number of sectors from decrypted provider, eg. 2. */
 	nsec = bp->bio_length / decr_secsize;
 	/* Number of sectors from encrypted provider, eg. 18. */
 	nsec = (nsec * sc->sc_bytes_per_sector) / encr_secsize;
 	/* Last sector number in every big sector, eg. 9. */
 	lsec = sc->sc_bytes_per_sector / encr_secsize;
 	/* Destination offset, used for IV generation. */
 	dstoff = (bp->bio_offset / bp->bio_to->sectorsize) * sc->sc_bytes_per_sector;
 
 	auth = NULL;	/* Silence compiler warning. */
 	plaindata = bp->bio_data;
 	if (bp->bio_cmd == BIO_READ) {
 		data = bp->bio_driver2;
 		auth = data + encr_secsize * nsec;
 		p = auth + sc->sc_alen * nsec;
 	} else {
 		size_t size;
 
 		size = encr_secsize * nsec;
 		size += sizeof(*crp) * nsec;
 		size += sizeof(*crde) * nsec;
 		size += sizeof(*crda) * nsec;
 		size += G_ELI_AUTH_SECKEYLEN * nsec;
 		size += sizeof(uintptr_t);	/* Space for alignment. */
 		data = malloc(size, M_ELI, M_WAITOK);
 		bp->bio_driver2 = data;
 		p = data + encr_secsize * nsec;
 	}
 	bp->bio_inbed = 0;
 	bp->bio_children = nsec;
 
 #if defined(__mips_n64) || defined(__mips_o64)
 	p = (char *)roundup((uintptr_t)p, sizeof(uintptr_t));
 #endif
 
 	for (i = 1; i <= nsec; i++, dstoff += encr_secsize) {
 		crp = (struct cryptop *)p;	p += sizeof(*crp);
 		crde = (struct cryptodesc *)p;	p += sizeof(*crde);
 		crda = (struct cryptodesc *)p;	p += sizeof(*crda);
 		authkey = (u_char *)p;		p += G_ELI_AUTH_SECKEYLEN;
 
 		data_secsize = sc->sc_data_per_sector;
 		if ((i % lsec) == 0) {
 			data_secsize = decr_secsize % data_secsize;
 			/*
 			 * Last encrypted sector of each decrypted sector is
 			 * only partially filled.
 			 */
 			if (bp->bio_cmd == BIO_WRITE)
 				memset(data + sc->sc_alen + data_secsize, 0,
 				    encr_secsize - sc->sc_alen - data_secsize);
 		}
 
 		if (bp->bio_cmd == BIO_READ) {
 			/* Remember read HMAC. */
 			bcopy(data, auth, sc->sc_alen);
 			auth += sc->sc_alen;
 			/* TODO: bzero(9) can be commented out later. */
 			bzero(data, sc->sc_alen);
 		} else {
 			bcopy(plaindata, data + sc->sc_alen, data_secsize);
 			plaindata += data_secsize;
 		}
 
 		crp->crp_session = wr->w_sid;
 		crp->crp_ilen = sc->sc_alen + data_secsize;
 		crp->crp_olen = data_secsize;
 		crp->crp_opaque = (void *)bp;
 		crp->crp_buf = (void *)data;
 		data += encr_secsize;
 		crp->crp_flags = CRYPTO_F_CBIFSYNC;
 		if (g_eli_batch)
 			crp->crp_flags |= CRYPTO_F_BATCH;
 		if (bp->bio_cmd == BIO_WRITE) {
 			crp->crp_callback = g_eli_auth_write_done;
 			crp->crp_desc = crde;
 			crde->crd_next = crda;
 			crda->crd_next = NULL;
 		} else {
 			crp->crp_callback = g_eli_auth_read_done;
 			crp->crp_desc = crda;
 			crda->crd_next = crde;
 			crde->crd_next = NULL;
 		}
 
 		crde->crd_skip = sc->sc_alen;
 		crde->crd_len = data_secsize;
 		crde->crd_flags = CRD_F_IV_EXPLICIT | CRD_F_IV_PRESENT;
 		if ((sc->sc_flags & G_ELI_FLAG_FIRST_KEY) == 0)
 			crde->crd_flags |= CRD_F_KEY_EXPLICIT;
 		if (bp->bio_cmd == BIO_WRITE)
 			crde->crd_flags |= CRD_F_ENCRYPT;
 		crde->crd_alg = sc->sc_ealgo;
 		crde->crd_key = g_eli_key_hold(sc, dstoff, encr_secsize);
 		crde->crd_klen = sc->sc_ekeylen;
 		if (sc->sc_ealgo == CRYPTO_AES_XTS)
 			crde->crd_klen <<= 1;
 		g_eli_crypto_ivgen(sc, dstoff, crde->crd_iv,
 		    sizeof(crde->crd_iv));
 
 		crda->crd_skip = sc->sc_alen;
 		crda->crd_len = data_secsize;
 		crda->crd_inject = 0;
 		crda->crd_flags = CRD_F_KEY_EXPLICIT;
 		crda->crd_alg = sc->sc_aalgo;
 		g_eli_auth_keygen(sc, dstoff, authkey);
 		crda->crd_key = authkey;
 		crda->crd_klen = G_ELI_AUTH_SECKEYLEN * 8;
 
 		crp->crp_etype = 0;
 		error = crypto_dispatch(crp);
 		KASSERT(error == 0, ("crypto_dispatch() failed (error=%d)",
 		    error));
 	}
 }
Index: head/sys/geom/eli/g_eli_privacy.c
===================================================================
--- head/sys/geom/eli/g_eli_privacy.c	(revision 350693)
+++ head/sys/geom/eli/g_eli_privacy.c	(revision 350694)
@@ -1,318 +1,319 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2005-2011 Pawel Jakub Dawidek <pawel@dawidek.net>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/linker.h>
 #include <sys/module.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/bio.h>
 #include <sys/sysctl.h>
 #include <sys/malloc.h>
 #include <sys/kthread.h>
 #include <sys/proc.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 #include <sys/vnode.h>
 
 #include <vm/uma.h>
 
 #include <geom/geom.h>
+#include <geom/geom_dbg.h>
 #include <geom/eli/g_eli.h>
 #include <geom/eli/pkcs5v2.h>
 
 /*
  * Code paths:
  * BIO_READ:
  *	g_eli_start -> g_eli_crypto_read -> g_io_request -> g_eli_read_done -> g_eli_crypto_run -> g_eli_crypto_read_done -> g_io_deliver
  * BIO_WRITE:
  *	g_eli_start -> g_eli_crypto_run -> g_eli_crypto_write_done -> g_io_request -> g_eli_write_done -> g_io_deliver
  */
 
 MALLOC_DECLARE(M_ELI);
 
 /*
  * The function is called after we read and decrypt data.
  *
  * g_eli_start -> g_eli_crypto_read -> g_io_request -> g_eli_read_done -> g_eli_crypto_run -> G_ELI_CRYPTO_READ_DONE -> g_io_deliver
  */
 static int
 g_eli_crypto_read_done(struct cryptop *crp)
 {
 	struct g_eli_softc *sc;
 	struct bio *bp;
 
 	if (crp->crp_etype == EAGAIN) {
 		if (g_eli_crypto_rerun(crp) == 0)
 			return (0);
 	}
 	bp = (struct bio *)crp->crp_opaque;
 	bp->bio_inbed++;
 	if (crp->crp_etype == 0) {
 		G_ELI_DEBUG(3, "Crypto READ request done (%d/%d).",
 		    bp->bio_inbed, bp->bio_children);
 		bp->bio_completed += crp->crp_olen;
 	} else {
 		G_ELI_DEBUG(1, "Crypto READ request failed (%d/%d) error=%d.",
 		    bp->bio_inbed, bp->bio_children, crp->crp_etype);
 		if (bp->bio_error == 0)
 			bp->bio_error = crp->crp_etype;
 	}
 	sc = bp->bio_to->geom->softc;
 	if (sc != NULL)
 		g_eli_key_drop(sc, crp->crp_desc->crd_key);
 	/*
 	 * Do we have all sectors already?
 	 */
 	if (bp->bio_inbed < bp->bio_children)
 		return (0);
 	free(bp->bio_driver2, M_ELI);
 	bp->bio_driver2 = NULL;
 	if (bp->bio_error != 0) {
 		G_ELI_LOGREQ(0, bp, "Crypto READ request failed (error=%d).",
 		    bp->bio_error);
 		bp->bio_completed = 0;
 	}
 	/*
 	 * Read is finished, send it up.
 	 */
 	g_io_deliver(bp, bp->bio_error);
 	if (sc != NULL)
 		atomic_subtract_int(&sc->sc_inflight, 1);
 	return (0);
 }
 
 /*
  * The function is called after data encryption.
  *
  * g_eli_start -> g_eli_crypto_run -> G_ELI_CRYPTO_WRITE_DONE -> g_io_request -> g_eli_write_done -> g_io_deliver
  */
 static int
 g_eli_crypto_write_done(struct cryptop *crp)
 {
 	struct g_eli_softc *sc;
 	struct g_geom *gp;
 	struct g_consumer *cp;
 	struct bio *bp, *cbp;
 
 	if (crp->crp_etype == EAGAIN) {
 		if (g_eli_crypto_rerun(crp) == 0)
 			return (0);
 	}
 	bp = (struct bio *)crp->crp_opaque;
 	bp->bio_inbed++;
 	if (crp->crp_etype == 0) {
 		G_ELI_DEBUG(3, "Crypto WRITE request done (%d/%d).",
 		    bp->bio_inbed, bp->bio_children);
 	} else {
 		G_ELI_DEBUG(1, "Crypto WRITE request failed (%d/%d) error=%d.",
 		    bp->bio_inbed, bp->bio_children, crp->crp_etype);
 		if (bp->bio_error == 0)
 			bp->bio_error = crp->crp_etype;
 	}
 	gp = bp->bio_to->geom;
 	sc = gp->softc;
 	g_eli_key_drop(sc, crp->crp_desc->crd_key);
 	/*
 	 * All sectors are already encrypted?
 	 */
 	if (bp->bio_inbed < bp->bio_children)
 		return (0);
 	bp->bio_inbed = 0;
 	bp->bio_children = 1;
 	cbp = bp->bio_driver1;
 	bp->bio_driver1 = NULL;
 	if (bp->bio_error != 0) {
 		G_ELI_LOGREQ(0, bp, "Crypto WRITE request failed (error=%d).",
 		    bp->bio_error);
 		free(bp->bio_driver2, M_ELI);
 		bp->bio_driver2 = NULL;
 		g_destroy_bio(cbp);
 		g_io_deliver(bp, bp->bio_error);
 		atomic_subtract_int(&sc->sc_inflight, 1);
 		return (0);
 	}
 	cbp->bio_data = bp->bio_driver2;
 	cbp->bio_done = g_eli_write_done;
 	cp = LIST_FIRST(&gp->consumer);
 	cbp->bio_to = cp->provider;
 	G_ELI_LOGREQ(2, cbp, "Sending request.");
 	/*
 	 * Send encrypted data to the provider.
 	 */
 	g_io_request(cbp, cp);
 	return (0);
 }
 
 /*
  * The function is called to read encrypted data.
  *
  * g_eli_start -> G_ELI_CRYPTO_READ -> g_io_request -> g_eli_read_done -> g_eli_crypto_run -> g_eli_crypto_read_done -> g_io_deliver
  */
 void
 g_eli_crypto_read(struct g_eli_softc *sc, struct bio *bp, boolean_t fromworker)
 {
 	struct g_consumer *cp;
 	struct bio *cbp;
 
 	if (!fromworker) {
 		/*
 		 * We are not called from the worker thread, so check if
 		 * device is suspended.
 		 */
 		mtx_lock(&sc->sc_queue_mtx);
 		if (sc->sc_flags & G_ELI_FLAG_SUSPEND) {
 			/*
 			 * If device is suspended, we place the request onto
 			 * the queue, so it can be handled after resume.
 			 */
 			G_ELI_DEBUG(0, "device suspended, move onto queue");
 			bioq_insert_tail(&sc->sc_queue, bp);
 			mtx_unlock(&sc->sc_queue_mtx);
 			wakeup(sc);
 			return;
 		}
 		atomic_add_int(&sc->sc_inflight, 1);
 		mtx_unlock(&sc->sc_queue_mtx);
 	}
 	bp->bio_pflags = 0;
 	bp->bio_driver2 = NULL;
 	cbp = bp->bio_driver1;
 	cbp->bio_done = g_eli_read_done;
 	cp = LIST_FIRST(&sc->sc_geom->consumer);
 	cbp->bio_to = cp->provider;
 	G_ELI_LOGREQ(2, cbp, "Sending request.");
 	/*
 	 * Read encrypted data from provider.
 	 */
 	g_io_request(cbp, cp);
 }
 
 /*
  * This is the main function responsible for cryptography (ie. communication
  * with crypto(9) subsystem).
  *
  * BIO_READ:
  *	g_eli_start -> g_eli_crypto_read -> g_io_request -> g_eli_read_done -> G_ELI_CRYPTO_RUN -> g_eli_crypto_read_done -> g_io_deliver
  * BIO_WRITE:
  *	g_eli_start -> G_ELI_CRYPTO_RUN -> g_eli_crypto_write_done -> g_io_request -> g_eli_write_done -> g_io_deliver
  */
 void
 g_eli_crypto_run(struct g_eli_worker *wr, struct bio *bp)
 {
 	struct g_eli_softc *sc;
 	struct cryptop *crp;
 	struct cryptodesc *crd;
 	u_int i, nsec, secsize;
 	off_t dstoff;
 	size_t size;
 	u_char *p, *data;
 	int error;
 
 	G_ELI_LOGREQ(3, bp, "%s", __func__);
 
 	bp->bio_pflags = wr->w_number;
 	sc = wr->w_softc;
 	secsize = LIST_FIRST(&sc->sc_geom->provider)->sectorsize;
 	nsec = bp->bio_length / secsize;
 
 	/*
 	 * Calculate how much memory do we need.
 	 * We need separate crypto operation for every single sector.
 	 * It is much faster to calculate total amount of needed memory here and
 	 * do the allocation once instead of allocating memory in pieces (many,
 	 * many pieces).
 	 */
 	size = sizeof(*crp) * nsec;
 	size += sizeof(*crd) * nsec;
 	/*
 	 * If we write the data we cannot destroy current bio_data content,
 	 * so we need to allocate more memory for encrypted data.
 	 */
 	if (bp->bio_cmd == BIO_WRITE)
 		size += bp->bio_length;
 	p = malloc(size, M_ELI, M_WAITOK);
 
 	bp->bio_inbed = 0;
 	bp->bio_children = nsec;
 	bp->bio_driver2 = p;
 
 	if (bp->bio_cmd == BIO_READ)
 		data = bp->bio_data;
 	else {
 		data = p;
 		p += bp->bio_length;
 		bcopy(bp->bio_data, data, bp->bio_length);
 	}
 
 	for (i = 0, dstoff = bp->bio_offset; i < nsec; i++, dstoff += secsize) {
 		crp = (struct cryptop *)p;	p += sizeof(*crp);
 		crd = (struct cryptodesc *)p;	p += sizeof(*crd);
 
 		crp->crp_session = wr->w_sid;
 		crp->crp_ilen = secsize;
 		crp->crp_olen = secsize;
 		crp->crp_opaque = (void *)bp;
 		crp->crp_buf = (void *)data;
 		data += secsize;
 		if (bp->bio_cmd == BIO_WRITE)
 			crp->crp_callback = g_eli_crypto_write_done;
 		else /* if (bp->bio_cmd == BIO_READ) */
 			crp->crp_callback = g_eli_crypto_read_done;
 		crp->crp_flags = CRYPTO_F_CBIFSYNC;
 		if (g_eli_batch)
 			crp->crp_flags |= CRYPTO_F_BATCH;
 		crp->crp_desc = crd;
 
 		crd->crd_skip = 0;
 		crd->crd_len = secsize;
 		crd->crd_flags = CRD_F_IV_EXPLICIT | CRD_F_IV_PRESENT;
 		if ((sc->sc_flags & G_ELI_FLAG_SINGLE_KEY) == 0)
 			crd->crd_flags |= CRD_F_KEY_EXPLICIT;
 		if (bp->bio_cmd == BIO_WRITE)
 			crd->crd_flags |= CRD_F_ENCRYPT;
 		crd->crd_alg = sc->sc_ealgo;
 		crd->crd_key = g_eli_key_hold(sc, dstoff, secsize);
 		crd->crd_klen = sc->sc_ekeylen;
 		if (sc->sc_ealgo == CRYPTO_AES_XTS)
 			crd->crd_klen <<= 1;
 		g_eli_crypto_ivgen(sc, dstoff, crd->crd_iv,
 		    sizeof(crd->crd_iv));
 		crd->crd_next = NULL;
 
 		crp->crp_etype = 0;
 		error = crypto_dispatch(crp);
 		KASSERT(error == 0, ("crypto_dispatch() failed (error=%d)",
 		    error));
 	}
 }
Index: head/sys/geom/gate/g_gate.c
===================================================================
--- head/sys/geom/gate/g_gate.c	(revision 350693)
+++ head/sys/geom/gate/g_gate.c	(revision 350694)
@@ -1,967 +1,968 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2004-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
  * Copyright (c) 2009-2010 The FreeBSD Foundation
  * All rights reserved.
  *
  * Portions of this software were developed by Pawel Jakub Dawidek
  * under sponsorship from the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bio.h>
 #include <sys/conf.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
 #include <sys/fcntl.h>
 #include <sys/linker.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/limits.h>
 #include <sys/queue.h>
 #include <sys/sbuf.h>
 #include <sys/sysctl.h>
 #include <sys/signalvar.h>
 #include <sys/time.h>
 #include <machine/atomic.h>
 
 #include <geom/geom.h>
+#include <geom/geom_dbg.h>
 #include <geom/gate/g_gate.h>
 
 FEATURE(geom_gate, "GEOM Gate module");
 
 static MALLOC_DEFINE(M_GATE, "gg_data", "GEOM Gate Data");
 
 SYSCTL_DECL(_kern_geom);
 static SYSCTL_NODE(_kern_geom, OID_AUTO, gate, CTLFLAG_RW, 0,
     "GEOM_GATE configuration");
 static int g_gate_debug = 0;
 SYSCTL_INT(_kern_geom_gate, OID_AUTO, debug, CTLFLAG_RWTUN, &g_gate_debug, 0,
     "Debug level");
 static u_int g_gate_maxunits = 256;
 SYSCTL_UINT(_kern_geom_gate, OID_AUTO, maxunits, CTLFLAG_RDTUN,
     &g_gate_maxunits, 0, "Maximum number of ggate devices");
 
 struct g_class g_gate_class = {
 	.name = G_GATE_CLASS_NAME,
 	.version = G_VERSION,
 };
 
 static struct cdev *status_dev;
 static d_ioctl_t g_gate_ioctl;
 static struct cdevsw g_gate_cdevsw = {
 	.d_version =	D_VERSION,
 	.d_ioctl =	g_gate_ioctl,
 	.d_name =	G_GATE_CTL_NAME
 };
 
 
 static struct g_gate_softc **g_gate_units;
 static u_int g_gate_nunits;
 static struct mtx g_gate_units_lock;
 
 static int
 g_gate_destroy(struct g_gate_softc *sc, boolean_t force)
 {
 	struct bio_queue_head queue;
 	struct g_provider *pp;
 	struct g_consumer *cp;
 	struct g_geom *gp;
 	struct bio *bp;
 
 	g_topology_assert();
 	mtx_assert(&g_gate_units_lock, MA_OWNED);
 	pp = sc->sc_provider;
 	if (!force && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
 		mtx_unlock(&g_gate_units_lock);
 		return (EBUSY);
 	}
 	mtx_unlock(&g_gate_units_lock);
 	mtx_lock(&sc->sc_queue_mtx);
 	if ((sc->sc_flags & G_GATE_FLAG_DESTROY) == 0)
 		sc->sc_flags |= G_GATE_FLAG_DESTROY;
 	wakeup(sc);
 	mtx_unlock(&sc->sc_queue_mtx);
 	gp = pp->geom;
 	g_wither_provider(pp, ENXIO);
 	callout_drain(&sc->sc_callout);
 	bioq_init(&queue);
 	mtx_lock(&sc->sc_queue_mtx);
 	while ((bp = bioq_takefirst(&sc->sc_inqueue)) != NULL) {
 		sc->sc_queue_count--;
 		bioq_insert_tail(&queue, bp);
 	}
 	while ((bp = bioq_takefirst(&sc->sc_outqueue)) != NULL) {
 		sc->sc_queue_count--;
 		bioq_insert_tail(&queue, bp);
 	}
 	mtx_unlock(&sc->sc_queue_mtx);
 	g_topology_unlock();
 	while ((bp = bioq_takefirst(&queue)) != NULL) {
 		G_GATE_LOGREQ(1, bp, "Request canceled.");
 		g_io_deliver(bp, ENXIO);
 	}
 	mtx_lock(&g_gate_units_lock);
 	/* One reference is ours. */
 	sc->sc_ref--;
 	while (sc->sc_ref > 0)
 		msleep(&sc->sc_ref, &g_gate_units_lock, 0, "gg:destroy", 0);
 	g_gate_units[sc->sc_unit] = NULL;
 	KASSERT(g_gate_nunits > 0, ("negative g_gate_nunits?"));
 	g_gate_nunits--;
 	mtx_unlock(&g_gate_units_lock);
 	mtx_destroy(&sc->sc_queue_mtx);
 	g_topology_lock();
 	if ((cp = sc->sc_readcons) != NULL) {
 		sc->sc_readcons = NULL;
 		(void)g_access(cp, -1, 0, 0);
 		g_detach(cp);
 		g_destroy_consumer(cp);
 	}
 	G_GATE_DEBUG(1, "Device %s destroyed.", gp->name);
 	gp->softc = NULL;
 	g_wither_geom(gp, ENXIO);
 	sc->sc_provider = NULL;
 	free(sc, M_GATE);
 	return (0);
 }
 
 static int
 g_gate_access(struct g_provider *pp, int dr, int dw, int de)
 {
 	struct g_gate_softc *sc;
 
 	if (dr <= 0 && dw <= 0 && de <= 0)
 		return (0);
 	sc = pp->geom->softc;
 	if (sc == NULL || (sc->sc_flags & G_GATE_FLAG_DESTROY) != 0)
 		return (ENXIO);
 	/* XXX: Hack to allow read-only mounts. */
 #if 0
 	if ((sc->sc_flags & G_GATE_FLAG_READONLY) != 0 && dw > 0)
 		return (EPERM);
 #endif
 	if ((sc->sc_flags & G_GATE_FLAG_WRITEONLY) != 0 && dr > 0)
 		return (EPERM);
 	return (0);
 }
 
 static void
 g_gate_queue_io(struct bio *bp)
 {
 	struct g_gate_softc *sc;
 
 	sc = bp->bio_to->geom->softc;
 	if (sc == NULL || (sc->sc_flags & G_GATE_FLAG_DESTROY) != 0) {
 		g_io_deliver(bp, ENXIO);
 		return;
 	}
 
 	mtx_lock(&sc->sc_queue_mtx);
 
 	if (sc->sc_queue_size > 0 && sc->sc_queue_count > sc->sc_queue_size) {
 		mtx_unlock(&sc->sc_queue_mtx);
 		G_GATE_LOGREQ(1, bp, "Queue full, request canceled.");
 		g_io_deliver(bp, ENOMEM);
 		return;
 	}
 
 	bp->bio_driver1 = (void *)sc->sc_seq;
 	sc->sc_seq++;
 	sc->sc_queue_count++;
 
 	bioq_insert_tail(&sc->sc_inqueue, bp);
 	wakeup(sc);
 
 	mtx_unlock(&sc->sc_queue_mtx);
 }
 
 static void
 g_gate_done(struct bio *cbp)
 {
 	struct bio *pbp;
 
 	pbp = cbp->bio_parent;
 	if (cbp->bio_error == 0) {
 		pbp->bio_completed = cbp->bio_completed;
 		g_destroy_bio(cbp);
 		pbp->bio_inbed++;
 		g_io_deliver(pbp, 0);
 	} else {
 		/* If direct read failed, pass it through userland daemon. */
 		g_destroy_bio(cbp);
 		pbp->bio_children--;
 		g_gate_queue_io(pbp);
 	}
 }
 
 static void
 g_gate_start(struct bio *pbp)
 {
 	struct g_gate_softc *sc;
 
 	sc = pbp->bio_to->geom->softc;
 	if (sc == NULL || (sc->sc_flags & G_GATE_FLAG_DESTROY) != 0) {
 		g_io_deliver(pbp, ENXIO);
 		return;
 	}
 	G_GATE_LOGREQ(2, pbp, "Request received.");
 	switch (pbp->bio_cmd) {
 	case BIO_READ:
 		if (sc->sc_readcons != NULL) {
 			struct bio *cbp;
 
 			cbp = g_clone_bio(pbp);
 			if (cbp == NULL) {
 				g_io_deliver(pbp, ENOMEM);
 				return;
 			}
 			cbp->bio_done = g_gate_done;
 			cbp->bio_offset = pbp->bio_offset + sc->sc_readoffset;
 			cbp->bio_to = sc->sc_readcons->provider;
 			g_io_request(cbp, sc->sc_readcons);
 			return;
 		}
 		break;
 	case BIO_DELETE:
 	case BIO_WRITE:
 	case BIO_FLUSH:
 		/* XXX: Hack to allow read-only mounts. */
 		if ((sc->sc_flags & G_GATE_FLAG_READONLY) != 0) {
 			g_io_deliver(pbp, EPERM);
 			return;
 		}
 		break;
 	case BIO_GETATTR:
 	default:
 		G_GATE_LOGREQ(2, pbp, "Ignoring request.");
 		g_io_deliver(pbp, EOPNOTSUPP);
 		return;
 	}
 
 	g_gate_queue_io(pbp);
 }
 
 static struct g_gate_softc *
 g_gate_hold(int unit, const char *name)
 {
 	struct g_gate_softc *sc = NULL;
 
 	mtx_lock(&g_gate_units_lock);
 	if (unit >= 0 && unit < g_gate_maxunits)
 		sc = g_gate_units[unit];
 	else if (unit == G_GATE_NAME_GIVEN) {
 		KASSERT(name != NULL, ("name is NULL"));
 		for (unit = 0; unit < g_gate_maxunits; unit++) {
 			if (g_gate_units[unit] == NULL)
 				continue;
 			if (strcmp(name,
 			    g_gate_units[unit]->sc_provider->name) != 0) {
 				continue;
 			}
 			sc = g_gate_units[unit];
 			break;
 		}
 	}
 	if (sc != NULL)
 		sc->sc_ref++;
 	mtx_unlock(&g_gate_units_lock);
 	return (sc);
 }
 
 static void
 g_gate_release(struct g_gate_softc *sc)
 {
 
 	g_topology_assert_not();
 	mtx_lock(&g_gate_units_lock);
 	sc->sc_ref--;
 	KASSERT(sc->sc_ref >= 0, ("Negative sc_ref for %s.", sc->sc_name));
 	if (sc->sc_ref == 0 && (sc->sc_flags & G_GATE_FLAG_DESTROY) != 0)
 		wakeup(&sc->sc_ref);
 	mtx_unlock(&g_gate_units_lock);
 }
 
 static int
 g_gate_getunit(int unit, int *errorp)
 {
 
 	mtx_assert(&g_gate_units_lock, MA_OWNED);
 	if (unit >= 0) {
 		if (unit >= g_gate_maxunits)
 			*errorp = EINVAL;
 		else if (g_gate_units[unit] == NULL)
 			return (unit);
 		else
 			*errorp = EEXIST;
 	} else {
 		for (unit = 0; unit < g_gate_maxunits; unit++) {
 			if (g_gate_units[unit] == NULL)
 				return (unit);
 		}
 		*errorp = ENFILE;
 	}
 	return (-1);
 }
 
 static void
 g_gate_guard(void *arg)
 {
 	struct bio_queue_head queue;
 	struct g_gate_softc *sc;
 	struct bintime curtime;
 	struct bio *bp, *bp2;
 
 	sc = arg;
 	binuptime(&curtime);
 	g_gate_hold(sc->sc_unit, NULL);
 	bioq_init(&queue);
 	mtx_lock(&sc->sc_queue_mtx);
 	TAILQ_FOREACH_SAFE(bp, &sc->sc_inqueue.queue, bio_queue, bp2) {
 		if (curtime.sec - bp->bio_t0.sec < 5)
 			continue;
 		bioq_remove(&sc->sc_inqueue, bp);
 		sc->sc_queue_count--;
 		bioq_insert_tail(&queue, bp);
 	}
 	TAILQ_FOREACH_SAFE(bp, &sc->sc_outqueue.queue, bio_queue, bp2) {
 		if (curtime.sec - bp->bio_t0.sec < 5)
 			continue;
 		bioq_remove(&sc->sc_outqueue, bp);
 		sc->sc_queue_count--;
 		bioq_insert_tail(&queue, bp);
 	}
 	mtx_unlock(&sc->sc_queue_mtx);
 	while ((bp = bioq_takefirst(&queue)) != NULL) {
 		G_GATE_LOGREQ(1, bp, "Request timeout.");
 		g_io_deliver(bp, EIO);
 	}
 	if ((sc->sc_flags & G_GATE_FLAG_DESTROY) == 0) {
 		callout_reset(&sc->sc_callout, sc->sc_timeout * hz,
 		    g_gate_guard, sc);
 	}
 	g_gate_release(sc);
 }
 
 static void
 g_gate_orphan(struct g_consumer *cp)
 {
 	struct g_gate_softc *sc;
 	struct g_geom *gp;
 
 	g_topology_assert();
 	gp = cp->geom;
 	sc = gp->softc;
 	if (sc == NULL)
 		return;
 	KASSERT(cp == sc->sc_readcons, ("cp=%p sc_readcons=%p", cp,
 	    sc->sc_readcons));
 	sc->sc_readcons = NULL;
 	G_GATE_DEBUG(1, "Destroying read consumer on provider %s orphan.",
 	    cp->provider->name);
 	(void)g_access(cp, -1, 0, 0);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 }
 
 static void
 g_gate_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
     struct g_consumer *cp, struct g_provider *pp)
 {
 	struct g_gate_softc *sc;
 
 	sc = gp->softc;
 	if (sc == NULL || pp != NULL || cp != NULL)
 		return;
 	sc = g_gate_hold(sc->sc_unit, NULL);
 	if (sc == NULL)
 		return;
 	if ((sc->sc_flags & G_GATE_FLAG_READONLY) != 0) {
 		sbuf_printf(sb, "%s<access>%s</access>\n", indent, "read-only");
 	} else if ((sc->sc_flags & G_GATE_FLAG_WRITEONLY) != 0) {
 		sbuf_printf(sb, "%s<access>%s</access>\n", indent,
 		    "write-only");
 	} else {
 		sbuf_printf(sb, "%s<access>%s</access>\n", indent,
 		    "read-write");
 	}
 	if (sc->sc_readcons != NULL) {
 		sbuf_printf(sb, "%s<read_offset>%jd</read_offset>\n",
 		    indent, (intmax_t)sc->sc_readoffset);
 		sbuf_printf(sb, "%s<read_provider>%s</read_provider>\n",
 		    indent, sc->sc_readcons->provider->name);
 	}
 	sbuf_printf(sb, "%s<timeout>%u</timeout>\n", indent, sc->sc_timeout);
 	sbuf_printf(sb, "%s<info>%s</info>\n", indent, sc->sc_info);
 	sbuf_printf(sb, "%s<queue_count>%u</queue_count>\n", indent,
 	    sc->sc_queue_count);
 	sbuf_printf(sb, "%s<queue_size>%u</queue_size>\n", indent,
 	    sc->sc_queue_size);
 	sbuf_printf(sb, "%s<ref>%u</ref>\n", indent, sc->sc_ref);
 	sbuf_printf(sb, "%s<unit>%d</unit>\n", indent, sc->sc_unit);
 	g_topology_unlock();
 	g_gate_release(sc);
 	g_topology_lock();
 }
 
 static int
 g_gate_create(struct g_gate_ctl_create *ggio)
 {
 	struct g_gate_softc *sc;
 	struct g_geom *gp;
 	struct g_provider *pp, *ropp;
 	struct g_consumer *cp;
 	char name[NAME_MAX];
 	int error = 0, unit;
 
 	if (ggio->gctl_mediasize <= 0) {
 		G_GATE_DEBUG(1, "Invalid media size.");
 		return (EINVAL);
 	}
 	if (ggio->gctl_sectorsize <= 0) {
 		G_GATE_DEBUG(1, "Invalid sector size.");
 		return (EINVAL);
 	}
 	if (!powerof2(ggio->gctl_sectorsize)) {
 		G_GATE_DEBUG(1, "Invalid sector size.");
 		return (EINVAL);
 	}
 	if ((ggio->gctl_mediasize % ggio->gctl_sectorsize) != 0) {
 		G_GATE_DEBUG(1, "Invalid media size.");
 		return (EINVAL);
 	}
 	if ((ggio->gctl_flags & G_GATE_FLAG_READONLY) != 0 &&
 	    (ggio->gctl_flags & G_GATE_FLAG_WRITEONLY) != 0) {
 		G_GATE_DEBUG(1, "Invalid flags.");
 		return (EINVAL);
 	}
 	if (ggio->gctl_unit != G_GATE_UNIT_AUTO &&
 	    ggio->gctl_unit != G_GATE_NAME_GIVEN &&
 	    ggio->gctl_unit < 0) {
 		G_GATE_DEBUG(1, "Invalid unit number.");
 		return (EINVAL);
 	}
 	if (ggio->gctl_unit == G_GATE_NAME_GIVEN &&
 	    ggio->gctl_name[0] == '\0') {
 		G_GATE_DEBUG(1, "No device name.");
 		return (EINVAL);
 	}
 
 	sc = malloc(sizeof(*sc), M_GATE, M_WAITOK | M_ZERO);
 	sc->sc_flags = (ggio->gctl_flags & G_GATE_USERFLAGS);
 	strlcpy(sc->sc_info, ggio->gctl_info, sizeof(sc->sc_info));
 	sc->sc_seq = 1;
 	bioq_init(&sc->sc_inqueue);
 	bioq_init(&sc->sc_outqueue);
 	mtx_init(&sc->sc_queue_mtx, "gg:queue", NULL, MTX_DEF);
 	sc->sc_queue_count = 0;
 	sc->sc_queue_size = ggio->gctl_maxcount;
 	if (sc->sc_queue_size > G_GATE_MAX_QUEUE_SIZE)
 		sc->sc_queue_size = G_GATE_MAX_QUEUE_SIZE;
 	sc->sc_timeout = ggio->gctl_timeout;
 	callout_init(&sc->sc_callout, 1);
 
 	mtx_lock(&g_gate_units_lock);
 	sc->sc_unit = g_gate_getunit(ggio->gctl_unit, &error);
 	if (sc->sc_unit < 0)
 		goto fail1;
 	if (ggio->gctl_unit == G_GATE_NAME_GIVEN)
 		snprintf(name, sizeof(name), "%s", ggio->gctl_name);
 	else {
 		snprintf(name, sizeof(name), "%s%d", G_GATE_PROVIDER_NAME,
 		    sc->sc_unit);
 	}
 	/* Check for name collision. */
 	for (unit = 0; unit < g_gate_maxunits; unit++) {
 		if (g_gate_units[unit] == NULL)
 			continue;
 		if (strcmp(name, g_gate_units[unit]->sc_name) != 0)
 			continue;
 		error = EEXIST;
 		goto fail1;
 	}
 	sc->sc_name = name;
 	g_gate_units[sc->sc_unit] = sc;
 	g_gate_nunits++;
 	mtx_unlock(&g_gate_units_lock);
 
 	g_topology_lock();
 
 	if (ggio->gctl_readprov[0] == '\0') {
 		ropp = NULL;
 	} else {
 		ropp = g_provider_by_name(ggio->gctl_readprov);
 		if (ropp == NULL) {
 			G_GATE_DEBUG(1, "Provider %s doesn't exist.",
 			    ggio->gctl_readprov);
 			error = EINVAL;
 			goto fail2;
 		}
 		if ((ggio->gctl_readoffset % ggio->gctl_sectorsize) != 0) {
 			G_GATE_DEBUG(1, "Invalid read offset.");
 			error = EINVAL;
 			goto fail2;
 		}
 		if (ggio->gctl_mediasize + ggio->gctl_readoffset >
 		    ropp->mediasize) {
 			G_GATE_DEBUG(1, "Invalid read offset or media size.");
 			error = EINVAL;
 			goto fail2;
 		}
 	}
 
 	gp = g_new_geomf(&g_gate_class, "%s", name);
 	gp->start = g_gate_start;
 	gp->access = g_gate_access;
 	gp->orphan = g_gate_orphan;
 	gp->dumpconf = g_gate_dumpconf;
 	gp->softc = sc;
 
 	if (ropp != NULL) {
 		cp = g_new_consumer(gp);
 		cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
 		error = g_attach(cp, ropp);
 		if (error != 0) {
 			G_GATE_DEBUG(1, "Unable to attach to %s.", ropp->name);
 			goto fail3;
 		}
 		error = g_access(cp, 1, 0, 0);
 		if (error != 0) {
 			G_GATE_DEBUG(1, "Unable to access %s.", ropp->name);
 			g_detach(cp);
 			goto fail3;
 		}
 		sc->sc_readcons = cp;
 		sc->sc_readoffset = ggio->gctl_readoffset;
 	}
 
 	ggio->gctl_unit = sc->sc_unit;
 
 	pp = g_new_providerf(gp, "%s", name);
 	pp->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE;
 	pp->mediasize = ggio->gctl_mediasize;
 	pp->sectorsize = ggio->gctl_sectorsize;
 	sc->sc_provider = pp;
 	g_error_provider(pp, 0);
 
 	g_topology_unlock();
 	mtx_lock(&g_gate_units_lock);
 	sc->sc_name = sc->sc_provider->name;
 	mtx_unlock(&g_gate_units_lock);
 	G_GATE_DEBUG(1, "Device %s created.", gp->name);
 
 	if (sc->sc_timeout > 0) {
 		callout_reset(&sc->sc_callout, sc->sc_timeout * hz,
 		    g_gate_guard, sc);
 	}
 	return (0);
 fail3:
 	g_destroy_consumer(cp);
 	g_destroy_geom(gp);
 fail2:
 	g_topology_unlock();
 	mtx_lock(&g_gate_units_lock);
 	g_gate_units[sc->sc_unit] = NULL;
 	KASSERT(g_gate_nunits > 0, ("negative g_gate_nunits?"));
 	g_gate_nunits--;
 fail1:
 	mtx_unlock(&g_gate_units_lock);
 	mtx_destroy(&sc->sc_queue_mtx);
 	free(sc, M_GATE);
 	return (error);
 }
 
 static int
 g_gate_modify(struct g_gate_softc *sc, struct g_gate_ctl_modify *ggio)
 {
 	struct g_provider *pp;
 	struct g_consumer *cp;
 	int error;
 
 	if ((ggio->gctl_modify & GG_MODIFY_MEDIASIZE) != 0) {
 		if (ggio->gctl_mediasize <= 0) {
 			G_GATE_DEBUG(1, "Invalid media size.");
 			return (EINVAL);
 		}
 		pp = sc->sc_provider;
 		if ((ggio->gctl_mediasize % pp->sectorsize) != 0) {
 			G_GATE_DEBUG(1, "Invalid media size.");
 			return (EINVAL);
 		}
 		g_resize_provider(pp, ggio->gctl_mediasize);
 		return (0);
 	}
 
 	if ((ggio->gctl_modify & GG_MODIFY_INFO) != 0)
 		(void)strlcpy(sc->sc_info, ggio->gctl_info, sizeof(sc->sc_info));
 
 	cp = NULL;
 
 	if ((ggio->gctl_modify & GG_MODIFY_READPROV) != 0) {
 		g_topology_lock();
 		if (sc->sc_readcons != NULL) {
 			cp = sc->sc_readcons;
 			sc->sc_readcons = NULL;
 			(void)g_access(cp, -1, 0, 0);
 			g_detach(cp);
 			g_destroy_consumer(cp);
 		}
 		if (ggio->gctl_readprov[0] != '\0') {
 			pp = g_provider_by_name(ggio->gctl_readprov);
 			if (pp == NULL) {
 				g_topology_unlock();
 				G_GATE_DEBUG(1, "Provider %s doesn't exist.",
 				    ggio->gctl_readprov);
 				return (EINVAL);
 			}
 			cp = g_new_consumer(sc->sc_provider->geom);
 			cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
 			error = g_attach(cp, pp);
 			if (error != 0) {
 				G_GATE_DEBUG(1, "Unable to attach to %s.",
 				    pp->name);
 			} else {
 				error = g_access(cp, 1, 0, 0);
 				if (error != 0) {
 					G_GATE_DEBUG(1, "Unable to access %s.",
 					    pp->name);
 					g_detach(cp);
 				}
 			}
 			if (error != 0) {
 				g_destroy_consumer(cp);
 				g_topology_unlock();
 				return (error);
 			}
 		}
 	} else {
 		cp = sc->sc_readcons;
 	}
 
 	if ((ggio->gctl_modify & GG_MODIFY_READOFFSET) != 0) {
 		if (cp == NULL) {
 			G_GATE_DEBUG(1, "No read provider.");
 			return (EINVAL);
 		}
 		pp = sc->sc_provider;
 		if ((ggio->gctl_readoffset % pp->sectorsize) != 0) {
 			G_GATE_DEBUG(1, "Invalid read offset.");
 			return (EINVAL);
 		}
 		if (pp->mediasize + ggio->gctl_readoffset >
 		    cp->provider->mediasize) {
 			G_GATE_DEBUG(1, "Invalid read offset or media size.");
 			return (EINVAL);
 		}
 		sc->sc_readoffset = ggio->gctl_readoffset;
 	}
 
 	if ((ggio->gctl_modify & GG_MODIFY_READPROV) != 0) {
 		sc->sc_readcons = cp;
 		g_topology_unlock();
 	}
 
 	return (0);
 }
 
 #define	G_GATE_CHECK_VERSION(ggio)	do {				\
 	if ((ggio)->gctl_version != G_GATE_VERSION) {			\
 		printf("Version mismatch %d != %d.\n",			\
 		    ggio->gctl_version, G_GATE_VERSION);		\
 		return (EINVAL);					\
 	}								\
 } while (0)
 static int
 g_gate_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, struct thread *td)
 {
 	struct g_gate_softc *sc;
 	struct bio *bp;
 	int error = 0;
 
 	G_GATE_DEBUG(4, "ioctl(%s, %lx, %p, %x, %p)", devtoname(dev), cmd, addr,
 	    flags, td);
 
 	switch (cmd) {
 	case G_GATE_CMD_CREATE:
 	    {
 		struct g_gate_ctl_create *ggio = (void *)addr;
 
 		G_GATE_CHECK_VERSION(ggio);
 		error = g_gate_create(ggio);
 		/*
 		 * Reset TDP_GEOM flag.
 		 * There are pending events for sure, because we just created
 		 * new provider and other classes want to taste it, but we
 		 * cannot answer on I/O requests until we're here.
 		 */
 		td->td_pflags &= ~TDP_GEOM;
 		return (error);
 	    }
 	case G_GATE_CMD_MODIFY:
 	    {
 		struct g_gate_ctl_modify *ggio = (void *)addr;
 
 		G_GATE_CHECK_VERSION(ggio);
 		sc = g_gate_hold(ggio->gctl_unit, NULL);
 		if (sc == NULL)
 			return (ENXIO);
 		error = g_gate_modify(sc, ggio);
 		g_gate_release(sc);
 		return (error);
 	    }
 	case G_GATE_CMD_DESTROY:
 	    {
 		struct g_gate_ctl_destroy *ggio = (void *)addr;
 
 		G_GATE_CHECK_VERSION(ggio);
 		sc = g_gate_hold(ggio->gctl_unit, ggio->gctl_name);
 		if (sc == NULL)
 			return (ENXIO);
 		g_topology_lock();
 		mtx_lock(&g_gate_units_lock);
 		error = g_gate_destroy(sc, ggio->gctl_force);
 		g_topology_unlock();
 		if (error != 0)
 			g_gate_release(sc);
 		return (error);
 	    }
 	case G_GATE_CMD_CANCEL:
 	    {
 		struct g_gate_ctl_cancel *ggio = (void *)addr;
 		struct bio *tbp, *lbp;
 
 		G_GATE_CHECK_VERSION(ggio);
 		sc = g_gate_hold(ggio->gctl_unit, ggio->gctl_name);
 		if (sc == NULL)
 			return (ENXIO);
 		lbp = NULL;
 		mtx_lock(&sc->sc_queue_mtx);
 		TAILQ_FOREACH_SAFE(bp, &sc->sc_outqueue.queue, bio_queue, tbp) {
 			if (ggio->gctl_seq == 0 ||
 			    ggio->gctl_seq == (uintptr_t)bp->bio_driver1) {
 				G_GATE_LOGREQ(1, bp, "Request canceled.");
 				bioq_remove(&sc->sc_outqueue, bp);
 				/*
 				 * Be sure to put requests back onto incoming
 				 * queue in the proper order.
 				 */
 				if (lbp == NULL)
 					bioq_insert_head(&sc->sc_inqueue, bp);
 				else {
 					TAILQ_INSERT_AFTER(&sc->sc_inqueue.queue,
 					    lbp, bp, bio_queue);
 				}
 				lbp = bp;
 				/*
 				 * If only one request was canceled, leave now.
 				 */
 				if (ggio->gctl_seq != 0)
 					break;
 			}
 		}
 		if (ggio->gctl_unit == G_GATE_NAME_GIVEN)
 			ggio->gctl_unit = sc->sc_unit;
 		mtx_unlock(&sc->sc_queue_mtx);
 		g_gate_release(sc);
 		return (error);
 	    }
 	case G_GATE_CMD_START:
 	    {
 		struct g_gate_ctl_io *ggio = (void *)addr;
 
 		G_GATE_CHECK_VERSION(ggio);
 		sc = g_gate_hold(ggio->gctl_unit, NULL);
 		if (sc == NULL)
 			return (ENXIO);
 		error = 0;
 		for (;;) {
 			mtx_lock(&sc->sc_queue_mtx);
 			bp = bioq_first(&sc->sc_inqueue);
 			if (bp != NULL)
 				break;
 			if ((sc->sc_flags & G_GATE_FLAG_DESTROY) != 0) {
 				ggio->gctl_error = ECANCELED;
 				mtx_unlock(&sc->sc_queue_mtx);
 				goto start_end;
 			}
 			if (msleep(sc, &sc->sc_queue_mtx,
 			    PPAUSE | PDROP | PCATCH, "ggwait", 0) != 0) {
 				ggio->gctl_error = ECANCELED;
 				goto start_end;
 			}
 		}
 		ggio->gctl_cmd = bp->bio_cmd;
 		if (bp->bio_cmd == BIO_WRITE &&
 		    bp->bio_length > ggio->gctl_length) {
 			mtx_unlock(&sc->sc_queue_mtx);
 			ggio->gctl_length = bp->bio_length;
 			ggio->gctl_error = ENOMEM;
 			goto start_end;
 		}
 		bioq_remove(&sc->sc_inqueue, bp);
 		bioq_insert_tail(&sc->sc_outqueue, bp);
 		mtx_unlock(&sc->sc_queue_mtx);
 
 		ggio->gctl_seq = (uintptr_t)bp->bio_driver1;
 		ggio->gctl_offset = bp->bio_offset;
 		ggio->gctl_length = bp->bio_length;
 
 		switch (bp->bio_cmd) {
 		case BIO_READ:
 		case BIO_DELETE:
 		case BIO_FLUSH:
 			break;
 		case BIO_WRITE:
 			error = copyout(bp->bio_data, ggio->gctl_data,
 			    bp->bio_length);
 			if (error != 0) {
 				mtx_lock(&sc->sc_queue_mtx);
 				bioq_remove(&sc->sc_outqueue, bp);
 				bioq_insert_head(&sc->sc_inqueue, bp);
 				mtx_unlock(&sc->sc_queue_mtx);
 				goto start_end;
 			}
 			break;
 		}
 start_end:
 		g_gate_release(sc);
 		return (error);
 	    }
 	case G_GATE_CMD_DONE:
 	    {
 		struct g_gate_ctl_io *ggio = (void *)addr;
 
 		G_GATE_CHECK_VERSION(ggio);
 		sc = g_gate_hold(ggio->gctl_unit, NULL);
 		if (sc == NULL)
 			return (ENOENT);
 		error = 0;
 		mtx_lock(&sc->sc_queue_mtx);
 		TAILQ_FOREACH(bp, &sc->sc_outqueue.queue, bio_queue) {
 			if (ggio->gctl_seq == (uintptr_t)bp->bio_driver1)
 				break;
 		}
 		if (bp != NULL) {
 			bioq_remove(&sc->sc_outqueue, bp);
 			sc->sc_queue_count--;
 		}
 		mtx_unlock(&sc->sc_queue_mtx);
 		if (bp == NULL) {
 			/*
 			 * Request was probably canceled.
 			 */
 			goto done_end;
 		}
 		if (ggio->gctl_error == EAGAIN) {
 			bp->bio_error = 0;
 			G_GATE_LOGREQ(1, bp, "Request desisted.");
 			mtx_lock(&sc->sc_queue_mtx);
 			sc->sc_queue_count++;
 			bioq_insert_head(&sc->sc_inqueue, bp);
 			wakeup(sc);
 			mtx_unlock(&sc->sc_queue_mtx);
 		} else {
 			bp->bio_error = ggio->gctl_error;
 			if (bp->bio_error == 0) {
 				bp->bio_completed = bp->bio_length;
 				switch (bp->bio_cmd) {
 				case BIO_READ:
 					error = copyin(ggio->gctl_data,
 					    bp->bio_data, bp->bio_length);
 					if (error != 0)
 						bp->bio_error = error;
 					break;
 				case BIO_DELETE:
 				case BIO_WRITE:
 				case BIO_FLUSH:
 					break;
 				}
 			}
 			G_GATE_LOGREQ(2, bp, "Request done.");
 			g_io_deliver(bp, bp->bio_error);
 		}
 done_end:
 		g_gate_release(sc);
 		return (error);
 	    }
 	}
 	return (ENOIOCTL);
 }
 
 static void
 g_gate_device(void)
 {
 
 	status_dev = make_dev(&g_gate_cdevsw, 0x0, UID_ROOT, GID_WHEEL, 0600,
 	    G_GATE_CTL_NAME);
 }
 
 static int
 g_gate_modevent(module_t mod, int type, void *data)
 {
 	int error = 0;
 
 	switch (type) {
 	case MOD_LOAD:
 		mtx_init(&g_gate_units_lock, "gg_units_lock", NULL, MTX_DEF);
 		g_gate_units = malloc(g_gate_maxunits * sizeof(g_gate_units[0]),
 		    M_GATE, M_WAITOK | M_ZERO);
 		g_gate_nunits = 0;
 		g_gate_device();
 		break;
 	case MOD_UNLOAD:
 		mtx_lock(&g_gate_units_lock);
 		if (g_gate_nunits > 0) {
 			mtx_unlock(&g_gate_units_lock);
 			error = EBUSY;
 			break;
 		}
 		mtx_unlock(&g_gate_units_lock);
 		mtx_destroy(&g_gate_units_lock);
 		if (status_dev != NULL)
 			destroy_dev(status_dev);
 		free(g_gate_units, M_GATE);
 		break;
 	default:
 		return (EOPNOTSUPP);
 		break;
 	}
 
 	return (error);
 }
 static moduledata_t g_gate_module = {
 	G_GATE_MOD_NAME,
 	g_gate_modevent,
 	NULL
 };
 DECLARE_MODULE(geom_gate, g_gate_module, SI_SUB_DRIVERS, SI_ORDER_MIDDLE);
 DECLARE_GEOM_CLASS(g_gate_class, g_gate);
 MODULE_VERSION(geom_gate, 0);
Index: head/sys/geom/gate/g_gate.h
===================================================================
--- head/sys/geom/gate/g_gate.h	(revision 350693)
+++ head/sys/geom/gate/g_gate.h	(revision 350694)
@@ -1,182 +1,164 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2004-2009 Pawel Jakub Dawidek <pjd@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _G_GATE_H_
 #define _G_GATE_H_
 
 #include <sys/param.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/queue.h>
 
 #include <geom/geom.h>
 
 #define	G_GATE_CLASS_NAME	"GATE"
 #define	G_GATE_PROVIDER_NAME	"ggate"
 #define	G_GATE_MOD_NAME		"ggate"
 #define	G_GATE_CTL_NAME		"ggctl"
 
 #define G_GATE_VERSION		3
 
 /*
  * Maximum number of request that can be stored in
  * the queue when there are no workers.
  */
 #define	G_GATE_MAX_QUEUE_SIZE	4096
 
 #define	G_GATE_FLAG_READONLY	0x0001
 #define	G_GATE_FLAG_WRITEONLY	0x0002
 #define	G_GATE_FLAG_DESTROY	0x1000
 #define	G_GATE_USERFLAGS	(G_GATE_FLAG_READONLY | G_GATE_FLAG_WRITEONLY)
 
 /*
  * Pick unit number automatically in /dev/ggate<unit>.
  */
 #define	G_GATE_UNIT_AUTO	(-1)
 /*
  * Full provider name is given, so don't use ggate<unit>.
  */
 #define	G_GATE_NAME_GIVEN	(-2)
 
 #define G_GATE_CMD_CREATE	_IOWR('m', 0, struct g_gate_ctl_create)
 #define G_GATE_CMD_MODIFY	_IOWR('m', 1, struct g_gate_ctl_modify)
 #define G_GATE_CMD_DESTROY	_IOWR('m', 2, struct g_gate_ctl_destroy)
 #define G_GATE_CMD_CANCEL	_IOWR('m', 3, struct g_gate_ctl_cancel)
 #define G_GATE_CMD_START	_IOWR('m', 4, struct g_gate_ctl_io)
 #define G_GATE_CMD_DONE		_IOWR('m', 5, struct g_gate_ctl_io)
 
 #define	G_GATE_INFOSIZE		2048
 
 #ifdef _KERNEL
 /*
  * 'P:' means 'Protected by'.
  */
 struct g_gate_softc {
 	char			*sc_name;		/* P: (read-only) */
 	int			 sc_unit;		/* P: (read-only) */
 	int			 sc_ref;		/* P: g_gate_list_mtx */
 	struct g_provider	*sc_provider;		/* P: (read-only) */
 	uint32_t		 sc_flags;		/* P: sc_queue_mtx */
 
 	struct bio_queue_head	 sc_inqueue;		/* P: sc_queue_mtx */
 	struct bio_queue_head	 sc_outqueue;		/* P: sc_queue_mtx */
 	struct mtx		 sc_queue_mtx;
 	uint32_t		 sc_queue_count;	/* P: sc_queue_mtx */
 	uint32_t		 sc_queue_size;		/* P: (read-only) */
 	u_int			 sc_timeout;		/* P: (read-only) */
 	struct g_consumer	*sc_readcons;		/* P: XXX */
 	off_t			 sc_readoffset;		/* P: XXX */
 	struct callout		 sc_callout;		/* P: (modified only
 							       from callout
 							       thread) */
 	uintptr_t		 sc_seq;		/* P: (modified only
 							       from g_down
 							       thread) */
 	LIST_ENTRY(g_gate_softc) sc_next;		/* P: g_gate_list_mtx */
 	char			 sc_info[G_GATE_INFOSIZE]; /* P: (read-only) */
 };
 
-#define	G_GATE_DEBUG(lvl, ...)	do {					\
-	if (g_gate_debug >= (lvl)) {					\
-		printf("GEOM_GATE");					\
-		if (g_gate_debug > 0)					\
-			printf("[%u]", lvl);				\
-		printf(": ");						\
-		printf(__VA_ARGS__);					\
-		printf("\n");						\
-	}								\
-} while (0)
-#define	G_GATE_LOGREQ(lvl, bp, ...)	do {				\
-	if (g_gate_debug >= (lvl)) {					\
-		printf("GEOM_GATE");					\
-		if (g_gate_debug > 0)					\
-			printf("[%u]", lvl);				\
-		printf(": ");						\
-		printf(__VA_ARGS__);					\
-		printf(" ");						\
-		g_print_bio(bp);					\
-		printf("\n");						\
-	}								\
-} while (0)
+#define	G_GATE_DEBUG(lvl, ...) \
+    _GEOM_DEBUG("GEOM_GATE", g_gate_debug, (lvl), NULL, __VA_ARGS__)
+#define	G_GATE_LOGREQ(lvl, bp, ...) \
+    _GEOM_DEBUG("GEOM_GATE", g_gate_debug, (lvl), (bp), __VA_ARGS__)
 #endif	/* !_KERNEL */
 
 struct g_gate_ctl_create {
 	u_int	gctl_version;
 	off_t	gctl_mediasize;
 	u_int	gctl_sectorsize;
 	u_int	gctl_flags;
 	u_int	gctl_maxcount;
 	u_int	gctl_timeout;
 	char	gctl_name[NAME_MAX];
 	char	gctl_info[G_GATE_INFOSIZE];
 	char	gctl_readprov[NAME_MAX];
 	off_t	gctl_readoffset;
 	int	gctl_unit;	/* in/out */
 };
 
 #define	GG_MODIFY_MEDIASIZE	0x01
 #define	GG_MODIFY_INFO		0x02
 #define	GG_MODIFY_READPROV	0x04
 #define	GG_MODIFY_READOFFSET	0x08
 struct g_gate_ctl_modify {
 	u_int		gctl_version;
 	int		gctl_unit;
 	uint32_t	gctl_modify;
 	off_t		gctl_mediasize;
 	char		gctl_info[G_GATE_INFOSIZE];
 	char		gctl_readprov[NAME_MAX];
 	off_t		gctl_readoffset;
 };
 
 struct g_gate_ctl_destroy {
 	u_int	gctl_version;
 	int	gctl_unit;
 	int	gctl_force;
 	char	gctl_name[NAME_MAX];
 };
 
 struct g_gate_ctl_cancel {
 	u_int		gctl_version;
 	int		gctl_unit;
 	uintptr_t	gctl_seq;
 	char		gctl_name[NAME_MAX];
 };
 
 struct g_gate_ctl_io {
 	u_int		 gctl_version;
 	int		 gctl_unit;
 	uintptr_t	 gctl_seq;
 	u_int		 gctl_cmd;
 	off_t		 gctl_offset;
 	off_t		 gctl_length;
 	void		*gctl_data;
 	int		 gctl_error;
 };
 #endif	/* !_G_GATE_H_ */
Index: head/sys/geom/geom.h
===================================================================
--- head/sys/geom/geom.h	(revision 350693)
+++ head/sys/geom/geom.h	(revision 350694)
@@ -1,435 +1,436 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 2002 Poul-Henning Kamp
  * Copyright (c) 2002 Networks Associates Technology, Inc.
  * All rights reserved.
  *
  * This software was developed for the FreeBSD Project by Poul-Henning Kamp
  * and NAI Labs, the Security Research Division of Network Associates, Inc.
  * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
  * DARPA CHATS research program.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The names of the authors may not be used to endorse or promote
  *    products derived from this software without specific prior written
  *    permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _GEOM_GEOM_H_
 #define _GEOM_GEOM_H_
 
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/sx.h>
 #include <sys/queue.h>
 #include <sys/ioccom.h>
 #include <sys/conf.h>
 #include <sys/module.h>
 
 struct g_class;
 struct g_geom;
 struct g_consumer;
 struct g_provider;
 struct g_stat;
 struct thread;
 struct bio;
 struct sbuf;
 struct gctl_req;
 struct g_configargs;
 struct disk_zone_args;
 
 typedef int g_config_t (struct g_configargs *ca);
 typedef void g_ctl_req_t (struct gctl_req *, struct g_class *cp, char const *verb);
 typedef int g_ctl_create_geom_t (struct gctl_req *, struct g_class *cp, struct g_provider *pp);
 typedef int g_ctl_destroy_geom_t (struct gctl_req *, struct g_class *cp, struct g_geom *gp);
 typedef int g_ctl_config_geom_t (struct gctl_req *, struct g_geom *gp, const char *verb);
 typedef void g_init_t (struct g_class *mp);
 typedef void g_fini_t (struct g_class *mp);
 typedef struct g_geom * g_taste_t (struct g_class *, struct g_provider *, int flags);
 typedef int g_ioctl_t(struct g_provider *pp, u_long cmd, void *data, int fflag, struct thread *td);
 #define G_TF_NORMAL		0
 #define G_TF_INSIST		1
 #define G_TF_TRANSPARENT	2
 typedef int g_access_t (struct g_provider *, int, int, int);
 /* XXX: not sure about the thread arg */
 typedef void g_orphan_t (struct g_consumer *);
 
 typedef void g_start_t (struct bio *);
 typedef void g_spoiled_t (struct g_consumer *);
 typedef void g_attrchanged_t (struct g_consumer *, const char *attr);
 typedef void g_provgone_t (struct g_provider *);
 typedef void g_dumpconf_t (struct sbuf *, const char *indent, struct g_geom *,
     struct g_consumer *, struct g_provider *);
 typedef void g_resize_t(struct g_consumer *cp);
 
 /*
  * The g_class structure describes a transformation class.  In other words
  * all BSD disklabel handlers share one g_class, all MBR handlers share
  * one common g_class and so on.
  * Certain operations are instantiated on the class, most notably the
  * taste and config_geom functions.
  */
 struct g_class {
 	const char		*name;
 	u_int			version;
 	u_int			spare0;
 	g_taste_t		*taste;
 	g_config_t		*config;
 	g_ctl_req_t		*ctlreq;
 	g_init_t		*init;
 	g_fini_t		*fini;
 	g_ctl_destroy_geom_t	*destroy_geom;
 	/*
 	 * Default values for geom methods
 	 */
 	g_start_t		*start;
 	g_spoiled_t		*spoiled;
 	g_attrchanged_t		*attrchanged;
 	g_dumpconf_t		*dumpconf;
 	g_access_t		*access;
 	g_orphan_t		*orphan;
 	g_ioctl_t		*ioctl;
 	g_provgone_t		*providergone;
 	g_resize_t		*resize;
 	void			*spare1;
 	void			*spare2;
 	/*
 	 * The remaining elements are private
 	 */
 	LIST_ENTRY(g_class)	class;
 	LIST_HEAD(,g_geom)	geom;
 };
 
 /*
  * The g_geom_alias is a list node for aliases for the geom name
  * for device node creation.
  */
 struct g_geom_alias {
 	LIST_ENTRY(g_geom_alias) ga_next;
 	const char		*ga_alias;
 };
 
 #define G_VERSION_00	0x19950323
 #define G_VERSION_01	0x20041207	/* add fflag to g_ioctl_t */
 #define G_VERSION	G_VERSION_01
 
 /*
  * The g_geom is an instance of a g_class.
  */
 struct g_geom {
 	char			*name;
 	struct g_class		*class;
 	LIST_ENTRY(g_geom)	geom;
 	LIST_HEAD(,g_consumer)	consumer;
 	LIST_HEAD(,g_provider)	provider;
 	TAILQ_ENTRY(g_geom)	geoms;	/* XXX: better name */
 	int			rank;
 	g_start_t		*start;
 	g_spoiled_t		*spoiled;
 	g_attrchanged_t		*attrchanged;
 	g_dumpconf_t		*dumpconf;
 	g_access_t		*access;
 	g_orphan_t		*orphan;
 	g_ioctl_t		*ioctl;
 	g_provgone_t		*providergone;
 	g_resize_t		*resize;
 	void			*spare0;
 	void			*spare1;
 	void			*softc;
 	unsigned		flags;
 #define	G_GEOM_WITHER		0x01
 #define	G_GEOM_VOLATILE_BIO	0x02
 #define	G_GEOM_IN_ACCESS	0x04
 #define	G_GEOM_ACCESS_WAIT	0x08
 	LIST_HEAD(,g_geom_alias) aliases;
 };
 
 /*
  * The g_bioq is a queue of struct bio's.
  * XXX: possibly collection point for statistics.
  * XXX: should (possibly) be collapsed with sys/bio.h::bio_queue_head.
  */
 struct g_bioq {
 	TAILQ_HEAD(, bio)	bio_queue;
 	struct mtx		bio_queue_lock;
 	int			bio_queue_length;
 };
 
 /*
  * A g_consumer is an attachment point for a g_provider.  One g_consumer
  * can only be attached to one g_provider, but multiple g_consumers
  * can be attached to one g_provider.
  */
 
 struct g_consumer {
 	struct g_geom		*geom;
 	LIST_ENTRY(g_consumer)	consumer;
 	struct g_provider	*provider;
 	LIST_ENTRY(g_consumer)	consumers;	/* XXX: better name */
 	int			acr, acw, ace;
 	int			flags;
 #define G_CF_SPOILED		0x1
 #define G_CF_ORPHAN		0x4
 #define G_CF_DIRECT_SEND	0x10
 #define G_CF_DIRECT_RECEIVE	0x20
 	struct devstat		*stat;
 	u_int			nstart, nend;
 
 	/* Two fields for the implementing class to use */
 	void			*private;
 	u_int			index;
 };
 
 /*
  * A g_provider is a "logical disk".
  */
 struct g_provider {
 	char			*name;
 	LIST_ENTRY(g_provider)	provider;
 	struct g_geom		*geom;
 	LIST_HEAD(,g_consumer)	consumers;
 	int			acr, acw, ace;
 	int			error;
 	TAILQ_ENTRY(g_provider)	orphan;
 	off_t			mediasize;
 	u_int			sectorsize;
 	off_t			stripesize;
 	off_t			stripeoffset;
 	struct devstat		*stat;
 	u_int			nstart, nend;
 	u_int			flags;
 #define G_PF_WITHER		0x2
 #define G_PF_ORPHAN		0x4
 #define	G_PF_ACCEPT_UNMAPPED	0x8
 #define G_PF_DIRECT_SEND	0x10
 #define G_PF_DIRECT_RECEIVE	0x20
 
 	/* Two fields for the implementing class to use */
 	void			*private;
 	u_int			index;
 };
 
 /*
  * Descriptor of a classifier. We can register a function and
  * an argument, which is called by g_io_request() on bio's
  * that are not previously classified.
  */
 struct g_classifier_hook {
 	TAILQ_ENTRY(g_classifier_hook) link;
 	int			(*func)(void *arg, struct bio *bp);
 	void			*arg;
 };
 
 /* BIO_GETATTR("GEOM::setstate") argument values. */
 #define G_STATE_FAILED		0
 #define G_STATE_REBUILD		1
 #define G_STATE_RESYNC		2
 #define G_STATE_ACTIVE		3
 
 /* geom_dev.c */
 struct cdev;
 void g_dev_print(void);
 void g_dev_physpath_changed(void);
 struct g_provider *g_dev_getprovider(struct cdev *dev);
 
 /* geom_dump.c */
 void g_trace(int level, const char *, ...);
 #	define G_T_TOPOLOGY	1
 #	define G_T_BIO		2
 #	define G_T_ACCESS	4
 
 
 /* geom_event.c */
 typedef void g_event_t(void *, int flag);
 #define EV_CANCEL	1
 int g_post_event(g_event_t *func, void *arg, int flag, ...);
 int g_waitfor_event(g_event_t *func, void *arg, int flag, ...);
 void g_cancel_event(void *ref);
 int g_attr_changed(struct g_provider *pp, const char *attr, int flag);
 int g_media_changed(struct g_provider *pp, int flag);
 int g_media_gone(struct g_provider *pp, int flag);
 void g_orphan_provider(struct g_provider *pp, int error);
 void g_waitidlelock(void);
 
 /* geom_subr.c */
 int g_access(struct g_consumer *cp, int nread, int nwrite, int nexcl);
 int g_attach(struct g_consumer *cp, struct g_provider *pp);
 int g_compare_names(const char *namea, const char *nameb);
 void g_destroy_consumer(struct g_consumer *cp);
 void g_destroy_geom(struct g_geom *pp);
 void g_destroy_provider(struct g_provider *pp);
 void g_detach(struct g_consumer *cp);
 void g_error_provider(struct g_provider *pp, int error);
 struct g_provider *g_provider_by_name(char const *arg);
 void g_geom_add_alias(struct g_geom *gp, const char *alias);
 int g_getattr__(const char *attr, struct g_consumer *cp, void *var, int len);
 #define g_getattr(a, c, v) g_getattr__((a), (c), (v), sizeof *(v))
 int g_handleattr(struct bio *bp, const char *attribute, const void *val,
     int len);
 int g_handleattr_int(struct bio *bp, const char *attribute, int val);
 int g_handleattr_off_t(struct bio *bp, const char *attribute, off_t val);
 int g_handleattr_uint16_t(struct bio *bp, const char *attribute, uint16_t val);
 int g_handleattr_str(struct bio *bp, const char *attribute, const char *str);
 struct g_consumer * g_new_consumer(struct g_geom *gp);
 struct g_geom * g_new_geomf(struct g_class *mp, const char *fmt, ...)
     __printflike(2, 3);
 struct g_provider * g_new_providerf(struct g_geom *gp, const char *fmt, ...)
     __printflike(2, 3);
 void g_resize_provider(struct g_provider *pp, off_t size);
 int g_retaste(struct g_class *mp);
 void g_spoil(struct g_provider *pp, struct g_consumer *cp);
 int g_std_access(struct g_provider *pp, int dr, int dw, int de);
 void g_std_done(struct bio *bp);
 void g_std_spoiled(struct g_consumer *cp);
 void g_wither_geom(struct g_geom *gp, int error);
 void g_wither_geom_close(struct g_geom *gp, int error);
 void g_wither_provider(struct g_provider *pp, int error);
 
 #if defined(DIAGNOSTIC) || defined(DDB)
 int g_valid_obj(void const *ptr);
 #endif
 #ifdef DIAGNOSTIC
 #define G_VALID_CLASS(foo) \
     KASSERT(g_valid_obj(foo) == 1, ("%p is not a g_class", foo))
 #define G_VALID_GEOM(foo) \
     KASSERT(g_valid_obj(foo) == 2, ("%p is not a g_geom", foo))
 #define G_VALID_CONSUMER(foo) \
     KASSERT(g_valid_obj(foo) == 3, ("%p is not a g_consumer", foo))
 #define G_VALID_PROVIDER(foo) \
     KASSERT(g_valid_obj(foo) == 4, ("%p is not a g_provider", foo))
 #else
 #define G_VALID_CLASS(foo) do { } while (0)
 #define G_VALID_GEOM(foo) do { } while (0)
 #define G_VALID_CONSUMER(foo) do { } while (0)
 #define G_VALID_PROVIDER(foo) do { } while (0)
 #endif
 
 int g_modevent(module_t, int, void *);
 
 /* geom_io.c */
 struct bio * g_clone_bio(struct bio *);
 struct bio * g_duplicate_bio(struct bio *);
 void g_destroy_bio(struct bio *);
 void g_io_deliver(struct bio *bp, int error);
 int g_io_getattr(const char *attr, struct g_consumer *cp, int *len, void *ptr);
 int g_io_zonecmd(struct disk_zone_args *zone_args, struct g_consumer *cp);
 int g_io_flush(struct g_consumer *cp);
 int g_register_classifier(struct g_classifier_hook *hook);
 void g_unregister_classifier(struct g_classifier_hook *hook);
 void g_io_request(struct bio *bp, struct g_consumer *cp);
 struct bio *g_new_bio(void);
 struct bio *g_alloc_bio(void);
 void g_reset_bio(struct bio *);
 void * g_read_data(struct g_consumer *cp, off_t offset, off_t length, int *error);
 int g_write_data(struct g_consumer *cp, off_t offset, void *ptr, off_t length);
 int g_delete_data(struct g_consumer *cp, off_t offset, off_t length);
-void g_print_bio(struct bio *bp);
+void g_format_bio(struct sbuf *, const struct bio *bp);
+void g_print_bio(const char *prefix, const struct bio *bp, const char *fmtsuffix, ...) __printflike(3, 4);
 int g_use_g_read_data(void *, off_t, void **, int);
 int g_use_g_write_data(void *, off_t, void *, int);
 
 /* geom_kern.c / geom_kernsim.c */
 
 #ifdef _KERNEL
 
 extern struct sx topology_lock;
 
 struct g_kerneldump {
 	off_t		offset;
 	off_t		length;
 	struct dumperinfo di;
 };
 
 MALLOC_DECLARE(M_GEOM);
 
 static __inline void *
 g_malloc(int size, int flags)
 {
 	void *p;
 
 	p = malloc(size, M_GEOM, flags);
 	return (p);
 }
 
 static __inline void
 g_free(void *ptr)
 {
 
 #ifdef DIAGNOSTIC
 	if (sx_xlocked(&topology_lock)) {
 		KASSERT(g_valid_obj(ptr) == 0,
 		    ("g_free(%p) of live object, type %d", ptr,
 		    g_valid_obj(ptr)));
 	}
 #endif
 	free(ptr, M_GEOM);
 }
 
 #define g_topology_lock() 					\
 	do {							\
 		sx_xlock(&topology_lock);			\
 	} while (0)
 
 #define g_topology_try_lock()	sx_try_xlock(&topology_lock)
 
 #define g_topology_unlock()					\
 	do {							\
 		sx_xunlock(&topology_lock);			\
 	} while (0)
 
 #define g_topology_assert()					\
 	do {							\
 		sx_assert(&topology_lock, SX_XLOCKED);		\
 	} while (0)
 
 #define g_topology_assert_not()					\
 	do {							\
 		sx_assert(&topology_lock, SX_UNLOCKED);		\
 	} while (0)
 
 #define g_topology_sleep(chan, timo)				\
 	sx_sleep(chan, &topology_lock, 0, "gtopol", timo)
 
 #define DECLARE_GEOM_CLASS(class, name) 			\
 	static moduledata_t name##_mod = {			\
 		#name, g_modevent, &class			\
 	};							\
 	DECLARE_MODULE(name, name##_mod, SI_SUB_DRIVERS, SI_ORDER_SECOND);
 
 int g_is_geom_thread(struct thread *td);
 
 #endif /* _KERNEL */
 
 /* geom_ctl.c */
 int gctl_set_param(struct gctl_req *req, const char *param, void const *ptr, int len);
 void gctl_set_param_err(struct gctl_req *req, const char *param, void const *ptr, int len);
 void *gctl_get_param(struct gctl_req *req, const char *param, int *len);
 char const *gctl_get_asciiparam(struct gctl_req *req, const char *param);
 void *gctl_get_paraml(struct gctl_req *req, const char *param, int len);
 int gctl_error(struct gctl_req *req, const char *fmt, ...) __printflike(2, 3);
 struct g_class *gctl_get_class(struct gctl_req *req, char const *arg);
 struct g_geom *gctl_get_geom(struct gctl_req *req, struct g_class *mpr, char const *arg);
 struct g_provider *gctl_get_provider(struct gctl_req *req, char const *arg);
 
 #endif /* _GEOM_GEOM_H_ */
Index: head/sys/geom/geom_dbg.h
===================================================================
--- head/sys/geom/geom_dbg.h	(nonexistent)
+++ head/sys/geom/geom_dbg.h	(revision 350694)
@@ -0,0 +1,49 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2019 Conrad Meyer <cem@FreeBSD.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#pragma once
+
+#ifdef _KERNEL
+
+#define _GEOM_DEBUG(classname, ctrlvar, loglvl, biop, formatstr, ...)	\
+do {									\
+	const int __control = (ctrlvar);				\
+	const int __level = (loglvl);					\
+									\
+	if (__control < __level)					\
+		break;							\
+									\
+	g_dbg_printf((classname), (__control > 0) ? __level : -1,	\
+	    (biop), ": " formatstr, ## __VA_ARGS__);			\
+} while (0)
+
+void g_dbg_printf(const char *classname, int lvl, struct bio *bp,
+    const char *format, ...) __printflike(4, 5);
+
+#endif /* _KERNEL */

Property changes on: head/sys/geom/geom_dbg.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Index: head/sys/geom/geom_io.c
===================================================================
--- head/sys/geom/geom_io.c	(revision 350693)
+++ head/sys/geom/geom_io.c	(revision 350694)
@@ -1,1095 +1,1127 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 2002 Poul-Henning Kamp
  * Copyright (c) 2002 Networks Associates Technology, Inc.
  * Copyright (c) 2013 The FreeBSD Foundation
  * All rights reserved.
  *
  * This software was developed for the FreeBSD Project by Poul-Henning Kamp
  * and NAI Labs, the Security Research Division of Network Associates, Inc.
  * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
  * DARPA CHATS research program.
  *
  * Portions of this software were developed by Konstantin Belousov
  * under sponsorship from the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The names of the authors may not be used to endorse or promote
  *    products derived from this software without specific prior written
  *    permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/bio.h>
 #include <sys/ktr.h>
 #include <sys/proc.h>
+#include <sys/sbuf.h>
 #include <sys/stack.h>
 #include <sys/sysctl.h>
 #include <sys/vmem.h>
+#include <machine/stdarg.h>
 
 #include <sys/errno.h>
 #include <geom/geom.h>
 #include <geom/geom_int.h>
 #include <sys/devicestat.h>
 
 #include <vm/uma.h>
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_map.h>
 
 static int	g_io_transient_map_bio(struct bio *bp);
 
 static struct g_bioq g_bio_run_down;
 static struct g_bioq g_bio_run_up;
 
 /*
  * Pace is a hint that we've had some trouble recently allocating
  * bios, so we should back off trying to send I/O down the stack
  * a bit to let the problem resolve. When pacing, we also turn
  * off direct dispatch to also reduce memory pressure from I/Os
  * there, at the expxense of some added latency while the memory
  * pressures exist. See g_io_schedule_down() for more details
  * and limitations.
  */
 static volatile u_int pace;
 
 static uma_zone_t	biozone;
 
 /*
  * The head of the list of classifiers used in g_io_request.
  * Use g_register_classifier() and g_unregister_classifier()
  * to add/remove entries to the list.
  * Classifiers are invoked in registration order.
  */
 static TAILQ_HEAD(g_classifier_tailq, g_classifier_hook)
     g_classifier_tailq = TAILQ_HEAD_INITIALIZER(g_classifier_tailq);
 
 #include <machine/atomic.h>
 
 static void
 g_bioq_lock(struct g_bioq *bq)
 {
 
 	mtx_lock(&bq->bio_queue_lock);
 }
 
 static void
 g_bioq_unlock(struct g_bioq *bq)
 {
 
 	mtx_unlock(&bq->bio_queue_lock);
 }
 
 #if 0
 static void
 g_bioq_destroy(struct g_bioq *bq)
 {
 
 	mtx_destroy(&bq->bio_queue_lock);
 }
 #endif
 
 static void
 g_bioq_init(struct g_bioq *bq)
 {
 
 	TAILQ_INIT(&bq->bio_queue);
 	mtx_init(&bq->bio_queue_lock, "bio queue", NULL, MTX_DEF);
 }
 
 static struct bio *
 g_bioq_first(struct g_bioq *bq)
 {
 	struct bio *bp;
 
 	bp = TAILQ_FIRST(&bq->bio_queue);
 	if (bp != NULL) {
 		KASSERT((bp->bio_flags & BIO_ONQUEUE),
 		    ("Bio not on queue bp=%p target %p", bp, bq));
 		bp->bio_flags &= ~BIO_ONQUEUE;
 		TAILQ_REMOVE(&bq->bio_queue, bp, bio_queue);
 		bq->bio_queue_length--;
 	}
 	return (bp);
 }
 
 struct bio *
 g_new_bio(void)
 {
 	struct bio *bp;
 
 	bp = uma_zalloc(biozone, M_NOWAIT | M_ZERO);
 #ifdef KTR
 	if ((KTR_COMPILE & KTR_GEOM) && (ktr_mask & KTR_GEOM)) {
 		struct stack st;
 
 		CTR1(KTR_GEOM, "g_new_bio(): %p", bp);
 		stack_save(&st);
 		CTRSTACK(KTR_GEOM, &st, 3);
 	}
 #endif
 	return (bp);
 }
 
 struct bio *
 g_alloc_bio(void)
 {
 	struct bio *bp;
 
 	bp = uma_zalloc(biozone, M_WAITOK | M_ZERO);
 #ifdef KTR
 	if ((KTR_COMPILE & KTR_GEOM) && (ktr_mask & KTR_GEOM)) {
 		struct stack st;
 
 		CTR1(KTR_GEOM, "g_alloc_bio(): %p", bp);
 		stack_save(&st);
 		CTRSTACK(KTR_GEOM, &st, 3);
 	}
 #endif
 	return (bp);
 }
 
 void
 g_destroy_bio(struct bio *bp)
 {
 #ifdef KTR
 	if ((KTR_COMPILE & KTR_GEOM) && (ktr_mask & KTR_GEOM)) {
 		struct stack st;
 
 		CTR1(KTR_GEOM, "g_destroy_bio(): %p", bp);
 		stack_save(&st);
 		CTRSTACK(KTR_GEOM, &st, 3);
 	}
 #endif
 	uma_zfree(biozone, bp);
 }
 
 struct bio *
 g_clone_bio(struct bio *bp)
 {
 	struct bio *bp2;
 
 	bp2 = uma_zalloc(biozone, M_NOWAIT | M_ZERO);
 	if (bp2 != NULL) {
 		bp2->bio_parent = bp;
 		bp2->bio_cmd = bp->bio_cmd;
 		/*
 		 *  BIO_ORDERED flag may be used by disk drivers to enforce
 		 *  ordering restrictions, so this flag needs to be cloned.
 		 *  BIO_UNMAPPED and BIO_VLIST should be inherited, to properly
 		 *  indicate which way the buffer is passed.
 		 *  Other bio flags are not suitable for cloning.
 		 */
 		bp2->bio_flags = bp->bio_flags &
 		    (BIO_ORDERED | BIO_UNMAPPED | BIO_VLIST);
 		bp2->bio_length = bp->bio_length;
 		bp2->bio_offset = bp->bio_offset;
 		bp2->bio_data = bp->bio_data;
 		bp2->bio_ma = bp->bio_ma;
 		bp2->bio_ma_n = bp->bio_ma_n;
 		bp2->bio_ma_offset = bp->bio_ma_offset;
 		bp2->bio_attribute = bp->bio_attribute;
 		if (bp->bio_cmd == BIO_ZONE)
 			bcopy(&bp->bio_zone, &bp2->bio_zone,
 			    sizeof(bp->bio_zone));
 		/* Inherit classification info from the parent */
 		bp2->bio_classifier1 = bp->bio_classifier1;
 		bp2->bio_classifier2 = bp->bio_classifier2;
 #if defined(BUF_TRACKING) || defined(FULL_BUF_TRACKING)
 		bp2->bio_track_bp = bp->bio_track_bp;
 #endif
 		bp->bio_children++;
 	}
 #ifdef KTR
 	if ((KTR_COMPILE & KTR_GEOM) && (ktr_mask & KTR_GEOM)) {
 		struct stack st;
 
 		CTR2(KTR_GEOM, "g_clone_bio(%p): %p", bp, bp2);
 		stack_save(&st);
 		CTRSTACK(KTR_GEOM, &st, 3);
 	}
 #endif
 	return(bp2);
 }
 
 struct bio *
 g_duplicate_bio(struct bio *bp)
 {
 	struct bio *bp2;
 
 	bp2 = uma_zalloc(biozone, M_WAITOK | M_ZERO);
 	bp2->bio_flags = bp->bio_flags & (BIO_UNMAPPED | BIO_VLIST);
 	bp2->bio_parent = bp;
 	bp2->bio_cmd = bp->bio_cmd;
 	bp2->bio_length = bp->bio_length;
 	bp2->bio_offset = bp->bio_offset;
 	bp2->bio_data = bp->bio_data;
 	bp2->bio_ma = bp->bio_ma;
 	bp2->bio_ma_n = bp->bio_ma_n;
 	bp2->bio_ma_offset = bp->bio_ma_offset;
 	bp2->bio_attribute = bp->bio_attribute;
 	bp->bio_children++;
 #ifdef KTR
 	if ((KTR_COMPILE & KTR_GEOM) && (ktr_mask & KTR_GEOM)) {
 		struct stack st;
 
 		CTR2(KTR_GEOM, "g_duplicate_bio(%p): %p", bp, bp2);
 		stack_save(&st);
 		CTRSTACK(KTR_GEOM, &st, 3);
 	}
 #endif
 	return(bp2);
 }
 
 void
 g_reset_bio(struct bio *bp)
 {
 
 	bzero(bp, sizeof(*bp));
 }
 
 void
 g_io_init()
 {
 
 	g_bioq_init(&g_bio_run_down);
 	g_bioq_init(&g_bio_run_up);
 	biozone = uma_zcreate("g_bio", sizeof (struct bio),
 	    NULL, NULL,
 	    NULL, NULL,
 	    0, 0);
 }
 
 int
 g_io_getattr(const char *attr, struct g_consumer *cp, int *len, void *ptr)
 {
 	struct bio *bp;
 	int error;
 
 	g_trace(G_T_BIO, "bio_getattr(%s)", attr);
 	bp = g_alloc_bio();
 	bp->bio_cmd = BIO_GETATTR;
 	bp->bio_done = NULL;
 	bp->bio_attribute = attr;
 	bp->bio_length = *len;
 	bp->bio_data = ptr;
 	g_io_request(bp, cp);
 	error = biowait(bp, "ggetattr");
 	*len = bp->bio_completed;
 	g_destroy_bio(bp);
 	return (error);
 }
 
 int
 g_io_zonecmd(struct disk_zone_args *zone_args, struct g_consumer *cp)
 {
 	struct bio *bp;
 	int error;
 	
 	g_trace(G_T_BIO, "bio_zone(%d)", zone_args->zone_cmd);
 	bp = g_alloc_bio();
 	bp->bio_cmd = BIO_ZONE;
 	bp->bio_done = NULL;
 	/*
 	 * XXX KDM need to handle report zone data.
 	 */
 	bcopy(zone_args, &bp->bio_zone, sizeof(*zone_args));
 	if (zone_args->zone_cmd == DISK_ZONE_REPORT_ZONES)
 		bp->bio_length =
 		    zone_args->zone_params.report.entries_allocated *
 		    sizeof(struct disk_zone_rep_entry);
 	else
 		bp->bio_length = 0;
 
 	g_io_request(bp, cp);
 	error = biowait(bp, "gzone");
 	bcopy(&bp->bio_zone, zone_args, sizeof(*zone_args));
 	g_destroy_bio(bp);
 	return (error);
 }
 
 int
 g_io_flush(struct g_consumer *cp)
 {
 	struct bio *bp;
 	int error;
 
 	g_trace(G_T_BIO, "bio_flush(%s)", cp->provider->name);
 	bp = g_alloc_bio();
 	bp->bio_cmd = BIO_FLUSH;
 	bp->bio_flags |= BIO_ORDERED;
 	bp->bio_done = NULL;
 	bp->bio_attribute = NULL;
 	bp->bio_offset = cp->provider->mediasize;
 	bp->bio_length = 0;
 	bp->bio_data = NULL;
 	g_io_request(bp, cp);
 	error = biowait(bp, "gflush");
 	g_destroy_bio(bp);
 	return (error);
 }
 
 static int
 g_io_check(struct bio *bp)
 {
 	struct g_consumer *cp;
 	struct g_provider *pp;
 	off_t excess;
 	int error;
 
 	biotrack(bp, __func__);
 
 	cp = bp->bio_from;
 	pp = bp->bio_to;
 
 	/* Fail if access counters dont allow the operation */
 	switch(bp->bio_cmd) {
 	case BIO_READ:
 	case BIO_GETATTR:
 		if (cp->acr == 0)
 			return (EPERM);
 		break;
 	case BIO_WRITE:
 	case BIO_DELETE:
 	case BIO_FLUSH:
 		if (cp->acw == 0)
 			return (EPERM);
 		break;
 	case BIO_ZONE:
 		if ((bp->bio_zone.zone_cmd == DISK_ZONE_REPORT_ZONES) ||
 		    (bp->bio_zone.zone_cmd == DISK_ZONE_GET_PARAMS)) {
 			if (cp->acr == 0)
 				return (EPERM);
 		} else if (cp->acw == 0)
 			return (EPERM);
 		break;
 	default:
 		return (EPERM);
 	}
 	/* if provider is marked for error, don't disturb. */
 	if (pp->error)
 		return (pp->error);
 	if (cp->flags & G_CF_ORPHAN)
 		return (ENXIO);
 
 	switch(bp->bio_cmd) {
 	case BIO_READ:
 	case BIO_WRITE:
 	case BIO_DELETE:
 		/* Zero sectorsize or mediasize is probably a lack of media. */
 		if (pp->sectorsize == 0 || pp->mediasize == 0)
 			return (ENXIO);
 		/* Reject I/O not on sector boundary */
 		if (bp->bio_offset % pp->sectorsize)
 			return (EINVAL);
 		/* Reject I/O not integral sector long */
 		if (bp->bio_length % pp->sectorsize)
 			return (EINVAL);
 		/* Reject requests before or past the end of media. */
 		if (bp->bio_offset < 0)
 			return (EIO);
 		if (bp->bio_offset > pp->mediasize)
 			return (EIO);
 
 		/* Truncate requests to the end of providers media. */
 		excess = bp->bio_offset + bp->bio_length;
 		if (excess > bp->bio_to->mediasize) {
 			KASSERT((bp->bio_flags & BIO_UNMAPPED) == 0 ||
 			    round_page(bp->bio_ma_offset +
 			    bp->bio_length) / PAGE_SIZE == bp->bio_ma_n,
 			    ("excess bio %p too short", bp));
 			excess -= bp->bio_to->mediasize;
 			bp->bio_length -= excess;
 			if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
 				bp->bio_ma_n = round_page(bp->bio_ma_offset +
 				    bp->bio_length) / PAGE_SIZE;
 			}
 			if (excess > 0)
 				CTR3(KTR_GEOM, "g_down truncated bio "
 				    "%p provider %s by %d", bp,
 				    bp->bio_to->name, excess);
 		}
 
 		/* Deliver zero length transfers right here. */
 		if (bp->bio_length == 0) {
 			CTR2(KTR_GEOM, "g_down terminated 0-length "
 			    "bp %p provider %s", bp, bp->bio_to->name);
 			return (0);
 		}
 
 		if ((bp->bio_flags & BIO_UNMAPPED) != 0 &&
 		    (bp->bio_to->flags & G_PF_ACCEPT_UNMAPPED) == 0 &&
 		    (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE)) {
 			if ((error = g_io_transient_map_bio(bp)) >= 0)
 				return (error);
 		}
 		break;
 	default:
 		break;
 	}
 	return (EJUSTRETURN);
 }
 
 /*
  * bio classification support.
  *
  * g_register_classifier() and g_unregister_classifier()
  * are used to add/remove a classifier from the list.
  * The list is protected using the g_bio_run_down lock,
  * because the classifiers are called in this path.
  *
  * g_io_request() passes bio's that are not already classified
  * (i.e. those with bio_classifier1 == NULL) to g_run_classifiers().
  * Classifiers can store their result in the two fields
  * bio_classifier1 and bio_classifier2.
  * A classifier that updates one of the fields should
  * return a non-zero value.
  * If no classifier updates the field, g_run_classifiers() sets
  * bio_classifier1 = BIO_NOTCLASSIFIED to avoid further calls.
  */
 
 int
 g_register_classifier(struct g_classifier_hook *hook)
 {
 
 	g_bioq_lock(&g_bio_run_down);
 	TAILQ_INSERT_TAIL(&g_classifier_tailq, hook, link);
 	g_bioq_unlock(&g_bio_run_down);
 
 	return (0);
 }
 
 void
 g_unregister_classifier(struct g_classifier_hook *hook)
 {
 	struct g_classifier_hook *entry;
 
 	g_bioq_lock(&g_bio_run_down);
 	TAILQ_FOREACH(entry, &g_classifier_tailq, link) {
 		if (entry == hook) {
 			TAILQ_REMOVE(&g_classifier_tailq, hook, link);
 			break;
 		}
 	}
 	g_bioq_unlock(&g_bio_run_down);
 }
 
 static void
 g_run_classifiers(struct bio *bp)
 {
 	struct g_classifier_hook *hook;
 	int classified = 0;
 
 	biotrack(bp, __func__);
 
 	TAILQ_FOREACH(hook, &g_classifier_tailq, link)
 		classified |= hook->func(hook->arg, bp);
 
 	if (!classified)
 		bp->bio_classifier1 = BIO_NOTCLASSIFIED;
 }
 
 void
 g_io_request(struct bio *bp, struct g_consumer *cp)
 {
 	struct g_provider *pp;
 	struct mtx *mtxp;
 	int direct, error, first;
 	uint8_t cmd;
 
 	biotrack(bp, __func__);
 
 	KASSERT(cp != NULL, ("NULL cp in g_io_request"));
 	KASSERT(bp != NULL, ("NULL bp in g_io_request"));
 	pp = cp->provider;
 	KASSERT(pp != NULL, ("consumer not attached in g_io_request"));
 #ifdef DIAGNOSTIC
 	KASSERT(bp->bio_driver1 == NULL,
 	    ("bio_driver1 used by the consumer (geom %s)", cp->geom->name));
 	KASSERT(bp->bio_driver2 == NULL,
 	    ("bio_driver2 used by the consumer (geom %s)", cp->geom->name));
 	KASSERT(bp->bio_pflags == 0,
 	    ("bio_pflags used by the consumer (geom %s)", cp->geom->name));
 	/*
 	 * Remember consumer's private fields, so we can detect if they were
 	 * modified by the provider.
 	 */
 	bp->_bio_caller1 = bp->bio_caller1;
 	bp->_bio_caller2 = bp->bio_caller2;
 	bp->_bio_cflags = bp->bio_cflags;
 #endif
 
 	cmd = bp->bio_cmd;
 	if (cmd == BIO_READ || cmd == BIO_WRITE || cmd == BIO_GETATTR) {
 		KASSERT(bp->bio_data != NULL,
 		    ("NULL bp->data in g_io_request(cmd=%hu)", bp->bio_cmd));
 	}
 	if (cmd == BIO_DELETE || cmd == BIO_FLUSH) {
 		KASSERT(bp->bio_data == NULL,
 		    ("non-NULL bp->data in g_io_request(cmd=%hu)",
 		    bp->bio_cmd));
 	}
 	if (cmd == BIO_READ || cmd == BIO_WRITE || cmd == BIO_DELETE) {
 		KASSERT(bp->bio_offset % cp->provider->sectorsize == 0,
 		    ("wrong offset %jd for sectorsize %u",
 		    bp->bio_offset, cp->provider->sectorsize));
 		KASSERT(bp->bio_length % cp->provider->sectorsize == 0,
 		    ("wrong length %jd for sectorsize %u",
 		    bp->bio_length, cp->provider->sectorsize));
 	}
 
 	g_trace(G_T_BIO, "bio_request(%p) from %p(%s) to %p(%s) cmd %d",
 	    bp, cp, cp->geom->name, pp, pp->name, bp->bio_cmd);
 
 	bp->bio_from = cp;
 	bp->bio_to = pp;
 	bp->bio_error = 0;
 	bp->bio_completed = 0;
 
 	KASSERT(!(bp->bio_flags & BIO_ONQUEUE),
 	    ("Bio already on queue bp=%p", bp));
 	if ((g_collectstats & G_STATS_CONSUMERS) != 0 ||
 	    ((g_collectstats & G_STATS_PROVIDERS) != 0 && pp->stat != NULL))
 		binuptime(&bp->bio_t0);
 	else
 		getbinuptime(&bp->bio_t0);
 
 #ifdef GET_STACK_USAGE
 	direct = (cp->flags & G_CF_DIRECT_SEND) != 0 &&
 	    (pp->flags & G_PF_DIRECT_RECEIVE) != 0 &&
 	    !g_is_geom_thread(curthread) &&
 	    ((pp->flags & G_PF_ACCEPT_UNMAPPED) != 0 ||
 	    (bp->bio_flags & BIO_UNMAPPED) == 0 || THREAD_CAN_SLEEP()) &&
 	    pace == 0;
 	if (direct) {
 		/* Block direct execution if less then half of stack left. */
 		size_t	st, su;
 		GET_STACK_USAGE(st, su);
 		if (su * 2 > st)
 			direct = 0;
 	}
 #else
 	direct = 0;
 #endif
 
 	if (!TAILQ_EMPTY(&g_classifier_tailq) && !bp->bio_classifier1) {
 		g_bioq_lock(&g_bio_run_down);
 		g_run_classifiers(bp);
 		g_bioq_unlock(&g_bio_run_down);
 	}
 
 	/*
 	 * The statistics collection is lockless, as such, but we
 	 * can not update one instance of the statistics from more
 	 * than one thread at a time, so grab the lock first.
 	 */
 	mtxp = mtx_pool_find(mtxpool_sleep, pp);
 	mtx_lock(mtxp);
 	if (g_collectstats & G_STATS_PROVIDERS)
 		devstat_start_transaction(pp->stat, &bp->bio_t0);
 	if (g_collectstats & G_STATS_CONSUMERS)
 		devstat_start_transaction(cp->stat, &bp->bio_t0);
 	pp->nstart++;
 	cp->nstart++;
 	mtx_unlock(mtxp);
 
 	if (direct) {
 		error = g_io_check(bp);
 		if (error >= 0) {
 			CTR3(KTR_GEOM, "g_io_request g_io_check on bp %p "
 			    "provider %s returned %d", bp, bp->bio_to->name,
 			    error);
 			g_io_deliver(bp, error);
 			return;
 		}
 		bp->bio_to->geom->start(bp);
 	} else {
 		g_bioq_lock(&g_bio_run_down);
 		first = TAILQ_EMPTY(&g_bio_run_down.bio_queue);
 		TAILQ_INSERT_TAIL(&g_bio_run_down.bio_queue, bp, bio_queue);
 		bp->bio_flags |= BIO_ONQUEUE;
 		g_bio_run_down.bio_queue_length++;
 		g_bioq_unlock(&g_bio_run_down);
 		/* Pass it on down. */
 		if (first)
 			wakeup(&g_wait_down);
 	}
 }
 
 void
 g_io_deliver(struct bio *bp, int error)
 {
 	struct bintime now;
 	struct g_consumer *cp;
 	struct g_provider *pp;
 	struct mtx *mtxp;
 	int direct, first;
 
 	biotrack(bp, __func__);
 
 	KASSERT(bp != NULL, ("NULL bp in g_io_deliver"));
 	pp = bp->bio_to;
 	KASSERT(pp != NULL, ("NULL bio_to in g_io_deliver"));
 	cp = bp->bio_from;
 	if (cp == NULL) {
 		bp->bio_error = error;
 		bp->bio_done(bp);
 		return;
 	}
 	KASSERT(cp != NULL, ("NULL bio_from in g_io_deliver"));
 	KASSERT(cp->geom != NULL, ("NULL bio_from->geom in g_io_deliver"));
 #ifdef DIAGNOSTIC
 	/*
 	 * Some classes - GJournal in particular - can modify bio's
 	 * private fields while the bio is in transit; G_GEOM_VOLATILE_BIO
 	 * flag means it's an expected behaviour for that particular geom.
 	 */
 	if ((cp->geom->flags & G_GEOM_VOLATILE_BIO) == 0) {
 		KASSERT(bp->bio_caller1 == bp->_bio_caller1,
 		    ("bio_caller1 used by the provider %s", pp->name));
 		KASSERT(bp->bio_caller2 == bp->_bio_caller2,
 		    ("bio_caller2 used by the provider %s", pp->name));
 		KASSERT(bp->bio_cflags == bp->_bio_cflags,
 		    ("bio_cflags used by the provider %s", pp->name));
 	}
 #endif
 	KASSERT(bp->bio_completed >= 0, ("bio_completed can't be less than 0"));
 	KASSERT(bp->bio_completed <= bp->bio_length,
 	    ("bio_completed can't be greater than bio_length"));
 
 	g_trace(G_T_BIO,
 "g_io_deliver(%p) from %p(%s) to %p(%s) cmd %d error %d off %jd len %jd",
 	    bp, cp, cp->geom->name, pp, pp->name, bp->bio_cmd, error,
 	    (intmax_t)bp->bio_offset, (intmax_t)bp->bio_length);
 
 	KASSERT(!(bp->bio_flags & BIO_ONQUEUE),
 	    ("Bio already on queue bp=%p", bp));
 
 	/*
 	 * XXX: next two doesn't belong here
 	 */
 	bp->bio_bcount = bp->bio_length;
 	bp->bio_resid = bp->bio_bcount - bp->bio_completed;
 
 #ifdef GET_STACK_USAGE
 	direct = (pp->flags & G_PF_DIRECT_SEND) &&
 		 (cp->flags & G_CF_DIRECT_RECEIVE) &&
 		 !g_is_geom_thread(curthread);
 	if (direct) {
 		/* Block direct execution if less then half of stack left. */
 		size_t	st, su;
 		GET_STACK_USAGE(st, su);
 		if (su * 2 > st)
 			direct = 0;
 	}
 #else
 	direct = 0;
 #endif
 
 	/*
 	 * The statistics collection is lockless, as such, but we
 	 * can not update one instance of the statistics from more
 	 * than one thread at a time, so grab the lock first.
 	 */
 	if ((g_collectstats & G_STATS_CONSUMERS) != 0 ||
 	    ((g_collectstats & G_STATS_PROVIDERS) != 0 && pp->stat != NULL))
 		binuptime(&now);
 	mtxp = mtx_pool_find(mtxpool_sleep, cp);
 	mtx_lock(mtxp);
 	if (g_collectstats & G_STATS_PROVIDERS)
 		devstat_end_transaction_bio_bt(pp->stat, bp, &now);
 	if (g_collectstats & G_STATS_CONSUMERS)
 		devstat_end_transaction_bio_bt(cp->stat, bp, &now);
 	cp->nend++;
 	pp->nend++;
 	mtx_unlock(mtxp);
 
 	if (error != ENOMEM) {
 		bp->bio_error = error;
 		if (direct) {
 			biodone(bp);
 		} else {
 			g_bioq_lock(&g_bio_run_up);
 			first = TAILQ_EMPTY(&g_bio_run_up.bio_queue);
 			TAILQ_INSERT_TAIL(&g_bio_run_up.bio_queue, bp, bio_queue);
 			bp->bio_flags |= BIO_ONQUEUE;
 			g_bio_run_up.bio_queue_length++;
 			g_bioq_unlock(&g_bio_run_up);
 			if (first)
 				wakeup(&g_wait_up);
 		}
 		return;
 	}
 
 	if (bootverbose)
 		printf("ENOMEM %p on %p(%s)\n", bp, pp, pp->name);
 	bp->bio_children = 0;
 	bp->bio_inbed = 0;
 	bp->bio_driver1 = NULL;
 	bp->bio_driver2 = NULL;
 	bp->bio_pflags = 0;
 	g_io_request(bp, cp);
 	pace = 1;
 	return;
 }
 
 SYSCTL_DECL(_kern_geom);
 
 static long transient_maps;
 SYSCTL_LONG(_kern_geom, OID_AUTO, transient_maps, CTLFLAG_RD,
     &transient_maps, 0,
     "Total count of the transient mapping requests");
 u_int transient_map_retries = 10;
 SYSCTL_UINT(_kern_geom, OID_AUTO, transient_map_retries, CTLFLAG_RW,
     &transient_map_retries, 0,
     "Max count of retries used before giving up on creating transient map");
 int transient_map_hard_failures;
 SYSCTL_INT(_kern_geom, OID_AUTO, transient_map_hard_failures, CTLFLAG_RD,
     &transient_map_hard_failures, 0,
     "Failures to establish the transient mapping due to retry attempts "
     "exhausted");
 int transient_map_soft_failures;
 SYSCTL_INT(_kern_geom, OID_AUTO, transient_map_soft_failures, CTLFLAG_RD,
     &transient_map_soft_failures, 0,
     "Count of retried failures to establish the transient mapping");
 int inflight_transient_maps;
 SYSCTL_INT(_kern_geom, OID_AUTO, inflight_transient_maps, CTLFLAG_RD,
     &inflight_transient_maps, 0,
     "Current count of the active transient maps");
 
 static int
 g_io_transient_map_bio(struct bio *bp)
 {
 	vm_offset_t addr;
 	long size;
 	u_int retried;
 
 	KASSERT(unmapped_buf_allowed, ("unmapped disabled"));
 
 	size = round_page(bp->bio_ma_offset + bp->bio_length);
 	KASSERT(size / PAGE_SIZE == bp->bio_ma_n, ("Bio too short %p", bp));
 	addr = 0;
 	retried = 0;
 	atomic_add_long(&transient_maps, 1);
 retry:
 	if (vmem_alloc(transient_arena, size, M_BESTFIT | M_NOWAIT, &addr)) {
 		if (transient_map_retries != 0 &&
 		    retried >= transient_map_retries) {
 			CTR2(KTR_GEOM, "g_down cannot map bp %p provider %s",
 			    bp, bp->bio_to->name);
 			atomic_add_int(&transient_map_hard_failures, 1);
 			return (EDEADLK/* XXXKIB */);
 		} else {
 			/*
 			 * Naive attempt to quisce the I/O to get more
 			 * in-flight requests completed and defragment
 			 * the transient_arena.
 			 */
 			CTR3(KTR_GEOM, "g_down retrymap bp %p provider %s r %d",
 			    bp, bp->bio_to->name, retried);
 			pause("g_d_tra", hz / 10);
 			retried++;
 			atomic_add_int(&transient_map_soft_failures, 1);
 			goto retry;
 		}
 	}
 	atomic_add_int(&inflight_transient_maps, 1);
 	pmap_qenter((vm_offset_t)addr, bp->bio_ma, OFF_TO_IDX(size));
 	bp->bio_data = (caddr_t)addr + bp->bio_ma_offset;
 	bp->bio_flags |= BIO_TRANSIENT_MAPPING;
 	bp->bio_flags &= ~BIO_UNMAPPED;
 	return (EJUSTRETURN);
 }
 
 void
 g_io_schedule_down(struct thread *tp __unused)
 {
 	struct bio *bp;
 	int error;
 
 	for(;;) {
 		g_bioq_lock(&g_bio_run_down);
 		bp = g_bioq_first(&g_bio_run_down);
 		if (bp == NULL) {
 			CTR0(KTR_GEOM, "g_down going to sleep");
 			msleep(&g_wait_down, &g_bio_run_down.bio_queue_lock,
 			    PRIBIO | PDROP, "-", 0);
 			continue;
 		}
 		CTR0(KTR_GEOM, "g_down has work to do");
 		g_bioq_unlock(&g_bio_run_down);
 		biotrack(bp, __func__);
 		if (pace != 0) {
 			/*
 			 * There has been at least one memory allocation
 			 * failure since the last I/O completed. Pause 1ms to
 			 * give the system a chance to free up memory. We only
 			 * do this once because a large number of allocations
 			 * can fail in the direct dispatch case and there's no
 			 * relationship between the number of these failures and
 			 * the length of the outage. If there's still an outage,
 			 * we'll pause again and again until it's
 			 * resolved. Older versions paused longer and once per
 			 * allocation failure. This was OK for a single threaded
 			 * g_down, but with direct dispatch would lead to max of
 			 * 10 IOPs for minutes at a time when transient memory
 			 * issues prevented allocation for a batch of requests
 			 * from the upper layers.
 			 *
 			 * XXX This pacing is really lame. It needs to be solved
 			 * by other methods. This is OK only because the worst
 			 * case scenario is so rare. In the worst case scenario
 			 * all memory is tied up waiting for I/O to complete
 			 * which can never happen since we can't allocate bios
 			 * for that I/O.
 			 */
 			CTR0(KTR_GEOM, "g_down pacing self");
 			pause("g_down", min(hz/1000, 1));
 			pace = 0;
 		}
 		CTR2(KTR_GEOM, "g_down processing bp %p provider %s", bp,
 		    bp->bio_to->name);
 		error = g_io_check(bp);
 		if (error >= 0) {
 			CTR3(KTR_GEOM, "g_down g_io_check on bp %p provider "
 			    "%s returned %d", bp, bp->bio_to->name, error);
 			g_io_deliver(bp, error);
 			continue;
 		}
 		THREAD_NO_SLEEPING();
 		CTR4(KTR_GEOM, "g_down starting bp %p provider %s off %ld "
 		    "len %ld", bp, bp->bio_to->name, bp->bio_offset,
 		    bp->bio_length);
 		bp->bio_to->geom->start(bp);
 		THREAD_SLEEPING_OK();
 	}
 }
 
 void
 g_io_schedule_up(struct thread *tp __unused)
 {
 	struct bio *bp;
 
 	for(;;) {
 		g_bioq_lock(&g_bio_run_up);
 		bp = g_bioq_first(&g_bio_run_up);
 		if (bp == NULL) {
 			CTR0(KTR_GEOM, "g_up going to sleep");
 			msleep(&g_wait_up, &g_bio_run_up.bio_queue_lock,
 			    PRIBIO | PDROP, "-", 0);
 			continue;
 		}
 		g_bioq_unlock(&g_bio_run_up);
 		THREAD_NO_SLEEPING();
 		CTR4(KTR_GEOM, "g_up biodone bp %p provider %s off "
 		    "%jd len %ld", bp, bp->bio_to->name,
 		    bp->bio_offset, bp->bio_length);
 		biodone(bp);
 		THREAD_SLEEPING_OK();
 	}
 }
 
 void *
 g_read_data(struct g_consumer *cp, off_t offset, off_t length, int *error)
 {
 	struct bio *bp;
 	void *ptr;
 	int errorc;
 
 	KASSERT(length > 0 && length >= cp->provider->sectorsize &&
 	    length <= MAXPHYS, ("g_read_data(): invalid length %jd",
 	    (intmax_t)length));
 
 	bp = g_alloc_bio();
 	bp->bio_cmd = BIO_READ;
 	bp->bio_done = NULL;
 	bp->bio_offset = offset;
 	bp->bio_length = length;
 	ptr = g_malloc(length, M_WAITOK);
 	bp->bio_data = ptr;
 	g_io_request(bp, cp);
 	errorc = biowait(bp, "gread");
 	if (error != NULL)
 		*error = errorc;
 	g_destroy_bio(bp);
 	if (errorc) {
 		g_free(ptr);
 		ptr = NULL;
 	}
 	return (ptr);
 }
 
 /*
  * A read function for use by ffs_sbget when used by GEOM-layer routines.
  */
 int
 g_use_g_read_data(void *devfd, off_t loc, void **bufp, int size)
 {
 	struct g_consumer *cp;
 
 	KASSERT(*bufp == NULL,
 	    ("g_use_g_read_data: non-NULL *bufp %p\n", *bufp));
 
 	cp = (struct g_consumer *)devfd;
 	/*
 	 * Take care not to issue an invalid I/O request. The offset of
 	 * the superblock candidate must be multiples of the provider's
 	 * sector size, otherwise an FFS can't exist on the provider
 	 * anyway.
 	 */
 	if (loc % cp->provider->sectorsize != 0)
 		return (ENOENT);
 	*bufp = g_read_data(cp, loc, size, NULL);
 	if (*bufp == NULL)
 		return (ENOENT);
 	return (0);
 }
 
 int
 g_write_data(struct g_consumer *cp, off_t offset, void *ptr, off_t length)
 {
 	struct bio *bp;
 	int error;
 
 	KASSERT(length > 0 && length >= cp->provider->sectorsize &&
 	    length <= MAXPHYS, ("g_write_data(): invalid length %jd",
 	    (intmax_t)length));
 
 	bp = g_alloc_bio();
 	bp->bio_cmd = BIO_WRITE;
 	bp->bio_done = NULL;
 	bp->bio_offset = offset;
 	bp->bio_length = length;
 	bp->bio_data = ptr;
 	g_io_request(bp, cp);
 	error = biowait(bp, "gwrite");
 	g_destroy_bio(bp);
 	return (error);
 }
 
 /*
  * A write function for use by ffs_sbput when used by GEOM-layer routines.
  */
 int
 g_use_g_write_data(void *devfd, off_t loc, void *buf, int size)
 {
 
 	return (g_write_data((struct g_consumer *)devfd, loc, buf, size));
 }
 
 int
 g_delete_data(struct g_consumer *cp, off_t offset, off_t length)
 {
 	struct bio *bp;
 	int error;
 
 	KASSERT(length > 0 && length >= cp->provider->sectorsize,
 	    ("g_delete_data(): invalid length %jd", (intmax_t)length));
 
 	bp = g_alloc_bio();
 	bp->bio_cmd = BIO_DELETE;
 	bp->bio_done = NULL;
 	bp->bio_offset = offset;
 	bp->bio_length = length;
 	bp->bio_data = NULL;
 	g_io_request(bp, cp);
 	error = biowait(bp, "gdelete");
 	g_destroy_bio(bp);
 	return (error);
 }
 
 void
-g_print_bio(struct bio *bp)
+g_print_bio(const char *prefix, const struct bio *bp, const char *fmtsuffix,
+    ...)
 {
+#ifndef PRINTF_BUFR_SIZE
+#define PRINTF_BUFR_SIZE 64
+#endif
+	char bufr[PRINTF_BUFR_SIZE];
+	struct sbuf sb, *sbp __unused;
+	va_list ap;
+
+	sbp = sbuf_new(&sb, bufr, sizeof(bufr), SBUF_FIXEDLEN);
+	KASSERT(sbp != NULL, ("sbuf_new misused?"));
+
+	sbuf_set_drain(&sb, sbuf_printf_drain, NULL);
+
+	sbuf_cat(&sb, prefix);
+	g_format_bio(&sb, bp);
+
+	va_start(ap, fmtsuffix);
+	sbuf_vprintf(&sb, fmtsuffix, ap);
+	va_end(ap);
+
+	sbuf_nl_terminate(&sb);
+
+	sbuf_finish(&sb);
+	sbuf_delete(&sb);
+}
+
+void
+g_format_bio(struct sbuf *sb, const struct bio *bp)
+{
 	const char *pname, *cmd = NULL;
 
 	if (bp->bio_to != NULL)
 		pname = bp->bio_to->name;
 	else
 		pname = "[unknown]";
 
 	switch (bp->bio_cmd) {
 	case BIO_GETATTR:
 		cmd = "GETATTR";
-		printf("%s[%s(attr=%s)]", pname, cmd, bp->bio_attribute);
+		sbuf_printf(sb, "%s[%s(attr=%s)]", pname, cmd,
+		    bp->bio_attribute);
 		return;
 	case BIO_FLUSH:
 		cmd = "FLUSH";
-		printf("%s[%s]", pname, cmd);
+		sbuf_printf(sb, "%s[%s]", pname, cmd);
 		return;
 	case BIO_ZONE: {
 		char *subcmd = NULL;
 		cmd = "ZONE";
 		switch (bp->bio_zone.zone_cmd) {
 		case DISK_ZONE_OPEN:
 			subcmd = "OPEN";
 			break;
 		case DISK_ZONE_CLOSE:
 			subcmd = "CLOSE";
 			break;
 		case DISK_ZONE_FINISH:
 			subcmd = "FINISH";
 			break;
 		case DISK_ZONE_RWP:
 			subcmd = "RWP";
 			break;
 		case DISK_ZONE_REPORT_ZONES:
 			subcmd = "REPORT ZONES";
 			break;
 		case DISK_ZONE_GET_PARAMS:
 			subcmd = "GET PARAMS";
 			break;
 		default:
 			subcmd = "UNKNOWN";
 			break;
 		}
-		printf("%s[%s,%s]", pname, cmd, subcmd);
+		sbuf_printf(sb, "%s[%s,%s]", pname, cmd, subcmd);
 		return;
 	}
 	case BIO_READ:
 		cmd = "READ";
 		break;
 	case BIO_WRITE:
 		cmd = "WRITE";
 		break;
 	case BIO_DELETE:
 		cmd = "DELETE";
 		break;
 	default:
 		cmd = "UNKNOWN";
-		printf("%s[%s()]", pname, cmd);
+		sbuf_printf(sb, "%s[%s()]", pname, cmd);
 		return;
 	}
-	printf("%s[%s(offset=%jd, length=%jd)]", pname, cmd,
+	sbuf_printf(sb, "%s[%s(offset=%jd, length=%jd)]", pname, cmd,
 	    (intmax_t)bp->bio_offset, (intmax_t)bp->bio_length);
 }
Index: head/sys/geom/geom_subr.c
===================================================================
--- head/sys/geom/geom_subr.c	(revision 350693)
+++ head/sys/geom/geom_subr.c	(revision 350694)
@@ -1,1613 +1,1652 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 2002 Poul-Henning Kamp
  * Copyright (c) 2002 Networks Associates Technology, Inc.
  * All rights reserved.
  *
  * This software was developed for the FreeBSD Project by Poul-Henning Kamp
  * and NAI Labs, the Security Research Division of Network Associates, Inc.
  * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
  * DARPA CHATS research program.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The names of the authors may not be used to endorse or promote
  *    products derived from this software without specific prior written
  *    permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/devicestat.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/bio.h>
 #include <sys/sysctl.h>
 #include <sys/proc.h>
 #include <sys/kthread.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/errno.h>
 #include <sys/sbuf.h>
 #include <geom/geom.h>
+#include <geom/geom_dbg.h>
 #include <geom/geom_int.h>
 #include <machine/stdarg.h>
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #endif
 
 #ifdef KDB
 #include <sys/kdb.h>
 #endif
 
 struct class_list_head g_classes = LIST_HEAD_INITIALIZER(g_classes);
 static struct g_tailq_head geoms = TAILQ_HEAD_INITIALIZER(geoms);
 char *g_wait_event, *g_wait_up, *g_wait_down, *g_wait_sim;
 
 struct g_hh00 {
 	struct g_class		*mp;
 	struct g_provider	*pp;
 	off_t			size;
 	int			error;
 	int			post;
 };
+
+void
+g_dbg_printf(const char *classname, int lvl, struct bio *bp,
+    const char *format,
+    ...)
+{
+#ifndef PRINTF_BUFR_SIZE
+#define PRINTF_BUFR_SIZE 64
+#endif
+	char bufr[PRINTF_BUFR_SIZE];
+	struct sbuf sb, *sbp __unused;
+	va_list ap;
+
+	sbp = sbuf_new(&sb, bufr, sizeof(bufr), SBUF_FIXEDLEN);
+	KASSERT(sbp != NULL, ("sbuf_new misused?"));
+
+	sbuf_set_drain(&sb, sbuf_printf_drain, NULL);
+
+	sbuf_cat(&sb, classname);
+	if (lvl >= 0)
+		sbuf_printf(&sb, "[%d]", lvl);
+	
+	va_start(ap, format);
+	sbuf_vprintf(&sb, format, ap);
+	va_end(ap);
+
+	if (bp != NULL) {
+		sbuf_putc(&sb, ' ');
+		g_format_bio(&sb, bp);
+	}
+
+	/* Terminate the debug line with a single '\n'. */
+	sbuf_nl_terminate(&sb);
+
+	/* Flush line to printf. */
+	sbuf_finish(&sb);
+	sbuf_delete(&sb);
+}
 
 /*
  * This event offers a new class a chance to taste all preexisting providers.
  */
 static void
 g_load_class(void *arg, int flag)
 {
 	struct g_hh00 *hh;
 	struct g_class *mp2, *mp;
 	struct g_geom *gp;
 	struct g_provider *pp;
 
 	g_topology_assert();
 	if (flag == EV_CANCEL)	/* XXX: can't happen ? */
 		return;
 	if (g_shutdown)
 		return;
 
 	hh = arg;
 	mp = hh->mp;
 	hh->error = 0;
 	if (hh->post) {
 		g_free(hh);
 		hh = NULL;
 	}
 	g_trace(G_T_TOPOLOGY, "g_load_class(%s)", mp->name);
 	KASSERT(mp->name != NULL && *mp->name != '\0',
 	    ("GEOM class has no name"));
 	LIST_FOREACH(mp2, &g_classes, class) {
 		if (mp2 == mp) {
 			printf("The GEOM class %s is already loaded.\n",
 			    mp2->name);
 			if (hh != NULL)
 				hh->error = EEXIST;
 			return;
 		} else if (strcmp(mp2->name, mp->name) == 0) {
 			printf("A GEOM class %s is already loaded.\n",
 			    mp2->name);
 			if (hh != NULL)
 				hh->error = EEXIST;
 			return;
 		}
 	}
 
 	LIST_INIT(&mp->geom);
 	LIST_INSERT_HEAD(&g_classes, mp, class);
 	if (mp->init != NULL)
 		mp->init(mp);
 	if (mp->taste == NULL)
 		return;
 	LIST_FOREACH(mp2, &g_classes, class) {
 		if (mp == mp2)
 			continue;
 		LIST_FOREACH(gp, &mp2->geom, geom) {
 			LIST_FOREACH(pp, &gp->provider, provider) {
 				mp->taste(mp, pp, 0);
 				g_topology_assert();
 			}
 		}
 	}
 }
 
 static int
 g_unload_class(struct g_class *mp)
 {
 	struct g_geom *gp;
 	struct g_provider *pp;
 	struct g_consumer *cp;
 	int error;
 
 	g_topology_lock();
 	g_trace(G_T_TOPOLOGY, "g_unload_class(%s)", mp->name);
 retry:
 	G_VALID_CLASS(mp);
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		/* We refuse to unload if anything is open */
 		LIST_FOREACH(pp, &gp->provider, provider)
 			if (pp->acr || pp->acw || pp->ace) {
 				g_topology_unlock();
 				return (EBUSY);
 			}
 		LIST_FOREACH(cp, &gp->consumer, consumer)
 			if (cp->acr || cp->acw || cp->ace) {
 				g_topology_unlock();
 				return (EBUSY);
 			}
 		/* If the geom is withering, wait for it to finish. */
 		if (gp->flags & G_GEOM_WITHER) {
 			g_topology_sleep(mp, 1);
 			goto retry;
 		}
 	}
 
 	/*
 	 * We allow unloading if we have no geoms, or a class
 	 * method we can use to get rid of them.
 	 */
 	if (!LIST_EMPTY(&mp->geom) && mp->destroy_geom == NULL) {
 		g_topology_unlock();
 		return (EOPNOTSUPP);
 	}
 
 	/* Bar new entries */
 	mp->taste = NULL;
 	mp->config = NULL;
 
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		error = mp->destroy_geom(NULL, mp, gp);
 		if (error != 0) {
 			g_topology_unlock();
 			return (error);
 		}
 	}
 	/* Wait for withering to finish. */
 	for (;;) {
 		gp = LIST_FIRST(&mp->geom);
 		if (gp == NULL)
 			break;
 		KASSERT(gp->flags & G_GEOM_WITHER,
 		   ("Non-withering geom in class %s", mp->name));
 		g_topology_sleep(mp, 1);
 	}
 	G_VALID_CLASS(mp);
 	if (mp->fini != NULL)
 		mp->fini(mp);
 	LIST_REMOVE(mp, class);
 	g_topology_unlock();
 
 	return (0);
 }
 
 int
 g_modevent(module_t mod, int type, void *data)
 {
 	struct g_hh00 *hh;
 	int error;
 	static int g_ignition;
 	struct g_class *mp;
 
 	mp = data;
 	if (mp->version != G_VERSION) {
 		printf("GEOM class %s has Wrong version %x\n",
 		    mp->name, mp->version);
 		return (EINVAL);
 	}
 	if (!g_ignition) {
 		g_ignition++;
 		g_init();
 	}
 	error = EOPNOTSUPP;
 	switch (type) {
 	case MOD_LOAD:
 		g_trace(G_T_TOPOLOGY, "g_modevent(%s, LOAD)", mp->name);
 		hh = g_malloc(sizeof *hh, M_WAITOK | M_ZERO);
 		hh->mp = mp;
 		/*
 		 * Once the system is not cold, MOD_LOAD calls will be
 		 * from the userland and the g_event thread will be able
 		 * to acknowledge their completion.
 		 */
 		if (cold) {
 			hh->post = 1;
 			error = g_post_event(g_load_class, hh, M_WAITOK, NULL);
 		} else {
 			error = g_waitfor_event(g_load_class, hh, M_WAITOK,
 			    NULL);
 			if (error == 0)
 				error = hh->error;
 			g_free(hh);
 		}
 		break;
 	case MOD_UNLOAD:
 		g_trace(G_T_TOPOLOGY, "g_modevent(%s, UNLOAD)", mp->name);
 		error = g_unload_class(mp);
 		if (error == 0) {
 			KASSERT(LIST_EMPTY(&mp->geom),
 			    ("Unloaded class (%s) still has geom", mp->name));
 		}
 		break;
 	}
 	return (error);
 }
 
 static void
 g_retaste_event(void *arg, int flag)
 {
 	struct g_class *mp, *mp2;
 	struct g_geom *gp;
 	struct g_hh00 *hh;
 	struct g_provider *pp;
 	struct g_consumer *cp;
 
 	g_topology_assert();
 	if (flag == EV_CANCEL)  /* XXX: can't happen ? */
 		return;
 	if (g_shutdown || g_notaste)
 		return;
 
 	hh = arg;
 	mp = hh->mp;
 	hh->error = 0;
 	if (hh->post) {
 		g_free(hh);
 		hh = NULL;
 	}
 	g_trace(G_T_TOPOLOGY, "g_retaste(%s)", mp->name);
 
 	LIST_FOREACH(mp2, &g_classes, class) {
 		LIST_FOREACH(gp, &mp2->geom, geom) {
 			LIST_FOREACH(pp, &gp->provider, provider) {
 				if (pp->acr || pp->acw || pp->ace)
 					continue;
 				LIST_FOREACH(cp, &pp->consumers, consumers) {
 					if (cp->geom->class == mp &&
 					    (cp->flags & G_CF_ORPHAN) == 0)
 						break;
 				}
 				if (cp != NULL) {
 					cp->flags |= G_CF_ORPHAN;
 					g_wither_geom(cp->geom, ENXIO);
 				}
 				mp->taste(mp, pp, 0);
 				g_topology_assert();
 			}
 		}
 	}
 }
 
 int
 g_retaste(struct g_class *mp)
 {
 	struct g_hh00 *hh;
 	int error;
 
 	if (mp->taste == NULL)
 		return (EINVAL);
 
 	hh = g_malloc(sizeof *hh, M_WAITOK | M_ZERO);
 	hh->mp = mp;
 
 	if (cold) {
 		hh->post = 1;
 		error = g_post_event(g_retaste_event, hh, M_WAITOK, NULL);
 	} else {
 		error = g_waitfor_event(g_retaste_event, hh, M_WAITOK, NULL);
 		if (error == 0)
 			error = hh->error;
 		g_free(hh);
 	}
 
 	return (error);
 }
 
 struct g_geom *
 g_new_geomf(struct g_class *mp, const char *fmt, ...)
 {
 	struct g_geom *gp;
 	va_list ap;
 	struct sbuf *sb;
 
 	g_topology_assert();
 	G_VALID_CLASS(mp);
 	sb = sbuf_new_auto();
 	va_start(ap, fmt);
 	sbuf_vprintf(sb, fmt, ap);
 	va_end(ap);
 	sbuf_finish(sb);
 	gp = g_malloc(sizeof *gp, M_WAITOK | M_ZERO);
 	gp->name = g_malloc(sbuf_len(sb) + 1, M_WAITOK | M_ZERO);
 	gp->class = mp;
 	gp->rank = 1;
 	LIST_INIT(&gp->consumer);
 	LIST_INIT(&gp->provider);
 	LIST_INIT(&gp->aliases);
 	LIST_INSERT_HEAD(&mp->geom, gp, geom);
 	TAILQ_INSERT_HEAD(&geoms, gp, geoms);
 	strcpy(gp->name, sbuf_data(sb));
 	sbuf_delete(sb);
 	/* Fill in defaults from class */
 	gp->start = mp->start;
 	gp->spoiled = mp->spoiled;
 	gp->attrchanged = mp->attrchanged;
 	gp->providergone = mp->providergone;
 	gp->dumpconf = mp->dumpconf;
 	gp->access = mp->access;
 	gp->orphan = mp->orphan;
 	gp->ioctl = mp->ioctl;
 	gp->resize = mp->resize;
 	return (gp);
 }
 
 void
 g_destroy_geom(struct g_geom *gp)
 {
 	struct g_geom_alias *gap, *gaptmp;
 
 	g_topology_assert();
 	G_VALID_GEOM(gp);
 	g_trace(G_T_TOPOLOGY, "g_destroy_geom(%p(%s))", gp, gp->name);
 	KASSERT(LIST_EMPTY(&gp->consumer),
 	    ("g_destroy_geom(%s) with consumer(s) [%p]",
 	    gp->name, LIST_FIRST(&gp->consumer)));
 	KASSERT(LIST_EMPTY(&gp->provider),
 	    ("g_destroy_geom(%s) with provider(s) [%p]",
 	    gp->name, LIST_FIRST(&gp->provider)));
 	g_cancel_event(gp);
 	LIST_REMOVE(gp, geom);
 	TAILQ_REMOVE(&geoms, gp, geoms);
 	LIST_FOREACH_SAFE(gap, &gp->aliases, ga_next, gaptmp)
 		g_free(gap);
 	g_free(gp->name);
 	g_free(gp);
 }
 
 /*
  * This function is called (repeatedly) until the geom has withered away.
  */
 void
 g_wither_geom(struct g_geom *gp, int error)
 {
 	struct g_provider *pp;
 
 	g_topology_assert();
 	G_VALID_GEOM(gp);
 	g_trace(G_T_TOPOLOGY, "g_wither_geom(%p(%s))", gp, gp->name);
 	if (!(gp->flags & G_GEOM_WITHER)) {
 		gp->flags |= G_GEOM_WITHER;
 		LIST_FOREACH(pp, &gp->provider, provider)
 			if (!(pp->flags & G_PF_ORPHAN))
 				g_orphan_provider(pp, error);
 	}
 	g_do_wither();
 }
 
 /*
  * Convenience function to destroy a particular provider.
  */
 void
 g_wither_provider(struct g_provider *pp, int error)
 {
 
 	pp->flags |= G_PF_WITHER;
 	if (!(pp->flags & G_PF_ORPHAN))
 		g_orphan_provider(pp, error);
 }
 
 /*
  * This function is called (repeatedly) until the has withered away.
  */
 void
 g_wither_geom_close(struct g_geom *gp, int error)
 {
 	struct g_consumer *cp;
 
 	g_topology_assert();
 	G_VALID_GEOM(gp);
 	g_trace(G_T_TOPOLOGY, "g_wither_geom_close(%p(%s))", gp, gp->name);
 	LIST_FOREACH(cp, &gp->consumer, consumer)
 		if (cp->acr || cp->acw || cp->ace)
 			g_access(cp, -cp->acr, -cp->acw, -cp->ace);
 	g_wither_geom(gp, error);
 }
 
 /*
  * This function is called (repeatedly) until we cant wash away more
  * withered bits at present.
  */
 void
 g_wither_washer()
 {
 	struct g_class *mp;
 	struct g_geom *gp, *gp2;
 	struct g_provider *pp, *pp2;
 	struct g_consumer *cp, *cp2;
 
 	g_topology_assert();
 	LIST_FOREACH(mp, &g_classes, class) {
 		LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) {
 			LIST_FOREACH_SAFE(pp, &gp->provider, provider, pp2) {
 				if (!(pp->flags & G_PF_WITHER))
 					continue;
 				if (LIST_EMPTY(&pp->consumers))
 					g_destroy_provider(pp);
 			}
 			if (!(gp->flags & G_GEOM_WITHER))
 				continue;
 			LIST_FOREACH_SAFE(pp, &gp->provider, provider, pp2) {
 				if (LIST_EMPTY(&pp->consumers))
 					g_destroy_provider(pp);
 			}
 			LIST_FOREACH_SAFE(cp, &gp->consumer, consumer, cp2) {
 				if (cp->acr || cp->acw || cp->ace)
 					continue;
 				if (cp->provider != NULL)
 					g_detach(cp);
 				g_destroy_consumer(cp);
 			}
 			if (LIST_EMPTY(&gp->provider) &&
 			    LIST_EMPTY(&gp->consumer))
 				g_destroy_geom(gp);
 		}
 	}
 }
 
 struct g_consumer *
 g_new_consumer(struct g_geom *gp)
 {
 	struct g_consumer *cp;
 
 	g_topology_assert();
 	G_VALID_GEOM(gp);
 	KASSERT(!(gp->flags & G_GEOM_WITHER),
 	    ("g_new_consumer on WITHERing geom(%s) (class %s)",
 	    gp->name, gp->class->name));
 	KASSERT(gp->orphan != NULL,
 	    ("g_new_consumer on geom(%s) (class %s) without orphan",
 	    gp->name, gp->class->name));
 
 	cp = g_malloc(sizeof *cp, M_WAITOK | M_ZERO);
 	cp->geom = gp;
 	cp->stat = devstat_new_entry(cp, -1, 0, DEVSTAT_ALL_SUPPORTED,
 	    DEVSTAT_TYPE_DIRECT, DEVSTAT_PRIORITY_MAX);
 	LIST_INSERT_HEAD(&gp->consumer, cp, consumer);
 	return(cp);
 }
 
 void
 g_destroy_consumer(struct g_consumer *cp)
 {
 	struct g_geom *gp;
 
 	g_topology_assert();
 	G_VALID_CONSUMER(cp);
 	g_trace(G_T_TOPOLOGY, "g_destroy_consumer(%p)", cp);
 	KASSERT (cp->provider == NULL, ("g_destroy_consumer but attached"));
 	KASSERT (cp->acr == 0, ("g_destroy_consumer with acr"));
 	KASSERT (cp->acw == 0, ("g_destroy_consumer with acw"));
 	KASSERT (cp->ace == 0, ("g_destroy_consumer with ace"));
 	g_cancel_event(cp);
 	gp = cp->geom;
 	LIST_REMOVE(cp, consumer);
 	devstat_remove_entry(cp->stat);
 	g_free(cp);
 	if (gp->flags & G_GEOM_WITHER)
 		g_do_wither();
 }
 
 static void
 g_new_provider_event(void *arg, int flag)
 {
 	struct g_class *mp;
 	struct g_provider *pp;
 	struct g_consumer *cp, *next_cp;
 
 	g_topology_assert();
 	if (flag == EV_CANCEL)
 		return;
 	if (g_shutdown)
 		return;
 	pp = arg;
 	G_VALID_PROVIDER(pp);
 	KASSERT(!(pp->flags & G_PF_WITHER),
 	    ("g_new_provider_event but withered"));
 	LIST_FOREACH_SAFE(cp, &pp->consumers, consumers, next_cp) {
 		if ((cp->flags & G_CF_ORPHAN) == 0 &&
 		    cp->geom->attrchanged != NULL)
 			cp->geom->attrchanged(cp, "GEOM::media");
 	}
 	if (g_notaste)
 		return;
 	LIST_FOREACH(mp, &g_classes, class) {
 		if (mp->taste == NULL)
 			continue;
 		LIST_FOREACH(cp, &pp->consumers, consumers)
 			if (cp->geom->class == mp &&
 			    (cp->flags & G_CF_ORPHAN) == 0)
 				break;
 		if (cp != NULL)
 			continue;
 		mp->taste(mp, pp, 0);
 		g_topology_assert();
 	}
 }
 
 
 struct g_provider *
 g_new_providerf(struct g_geom *gp, const char *fmt, ...)
 {
 	struct g_provider *pp;
 	struct sbuf *sb;
 	va_list ap;
 
 	g_topology_assert();
 	G_VALID_GEOM(gp);
 	KASSERT(gp->access != NULL,
 	    ("new provider on geom(%s) without ->access (class %s)",
 	    gp->name, gp->class->name));
 	KASSERT(gp->start != NULL,
 	    ("new provider on geom(%s) without ->start (class %s)",
 	    gp->name, gp->class->name));
 	KASSERT(!(gp->flags & G_GEOM_WITHER),
 	    ("new provider on WITHERing geom(%s) (class %s)",
 	    gp->name, gp->class->name));
 	sb = sbuf_new_auto();
 	va_start(ap, fmt);
 	sbuf_vprintf(sb, fmt, ap);
 	va_end(ap);
 	sbuf_finish(sb);
 	pp = g_malloc(sizeof *pp + sbuf_len(sb) + 1, M_WAITOK | M_ZERO);
 	pp->name = (char *)(pp + 1);
 	strcpy(pp->name, sbuf_data(sb));
 	sbuf_delete(sb);
 	LIST_INIT(&pp->consumers);
 	pp->error = ENXIO;
 	pp->geom = gp;
 	pp->stat = devstat_new_entry(pp, -1, 0, DEVSTAT_ALL_SUPPORTED,
 	    DEVSTAT_TYPE_DIRECT, DEVSTAT_PRIORITY_MAX);
 	LIST_INSERT_HEAD(&gp->provider, pp, provider);
 	g_post_event(g_new_provider_event, pp, M_WAITOK, pp, gp, NULL);
 	return (pp);
 }
 
 void
 g_error_provider(struct g_provider *pp, int error)
 {
 
 	/* G_VALID_PROVIDER(pp);  We may not have g_topology */
 	pp->error = error;
 }
 
 static void
 g_resize_provider_event(void *arg, int flag)
 {
 	struct g_hh00 *hh;
 	struct g_class *mp;
 	struct g_geom *gp;
 	struct g_provider *pp;
 	struct g_consumer *cp, *cp2;
 	off_t size;
 
 	g_topology_assert();
 	if (g_shutdown)
 		return;
 
 	hh = arg;
 	pp = hh->pp;
 	size = hh->size;
 	g_free(hh);
 
 	G_VALID_PROVIDER(pp);
 	KASSERT(!(pp->flags & G_PF_WITHER),
 	    ("g_resize_provider_event but withered"));
 	g_trace(G_T_TOPOLOGY, "g_resize_provider_event(%p)", pp);
 
 	LIST_FOREACH_SAFE(cp, &pp->consumers, consumers, cp2) {
 		gp = cp->geom;
 		if (gp->resize == NULL && size < pp->mediasize) {
 			/*
 			 * XXX: g_dev_orphan method does deferred destroying
 			 * and it is possible, that other event could already
 			 * call the orphan method. Check consumer's flags to
 			 * do not schedule it twice.
 			 */
 			if (cp->flags & G_CF_ORPHAN)
 				continue;
 			cp->flags |= G_CF_ORPHAN;
 			cp->geom->orphan(cp);
 		}
 	}
 
 	pp->mediasize = size;
 	
 	LIST_FOREACH_SAFE(cp, &pp->consumers, consumers, cp2) {
 		gp = cp->geom;
 		if ((gp->flags & G_GEOM_WITHER) == 0 && gp->resize != NULL)
 			gp->resize(cp);
 	}
 
 	/*
 	 * After resizing, the previously invalid GEOM class metadata
 	 * might become valid.  This means we should retaste.
 	 */
 	LIST_FOREACH(mp, &g_classes, class) {
 		if (mp->taste == NULL)
 			continue;
 		LIST_FOREACH(cp, &pp->consumers, consumers)
 			if (cp->geom->class == mp &&
 			    (cp->flags & G_CF_ORPHAN) == 0)
 				break;
 		if (cp != NULL)
 			continue;
 		mp->taste(mp, pp, 0);
 		g_topology_assert();
 	}
 }
 
 void
 g_resize_provider(struct g_provider *pp, off_t size)
 {
 	struct g_hh00 *hh;
 
 	G_VALID_PROVIDER(pp);
 	if (pp->flags & G_PF_WITHER)
 		return;
 
 	if (size == pp->mediasize)
 		return;
 
 	hh = g_malloc(sizeof *hh, M_WAITOK | M_ZERO);
 	hh->pp = pp;
 	hh->size = size;
 	g_post_event(g_resize_provider_event, hh, M_WAITOK, NULL);
 }
 
 #ifndef	_PATH_DEV
 #define	_PATH_DEV	"/dev/"
 #endif
 
 struct g_provider *
 g_provider_by_name(char const *arg)
 {
 	struct g_class *cp;
 	struct g_geom *gp;
 	struct g_provider *pp, *wpp;
 
 	if (strncmp(arg, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0)
 		arg += sizeof(_PATH_DEV) - 1;
 
 	wpp = NULL;
 	LIST_FOREACH(cp, &g_classes, class) {
 		LIST_FOREACH(gp, &cp->geom, geom) {
 			LIST_FOREACH(pp, &gp->provider, provider) {
 				if (strcmp(arg, pp->name) != 0)
 					continue;
 				if ((gp->flags & G_GEOM_WITHER) == 0 &&
 				    (pp->flags & G_PF_WITHER) == 0)
 					return (pp);
 				else
 					wpp = pp;
 			}
 		}
 	}
 
 	return (wpp);
 }
 
 void
 g_destroy_provider(struct g_provider *pp)
 {
 	struct g_geom *gp;
 
 	g_topology_assert();
 	G_VALID_PROVIDER(pp);
 	KASSERT(LIST_EMPTY(&pp->consumers),
 	    ("g_destroy_provider but attached"));
 	KASSERT (pp->acr == 0, ("g_destroy_provider with acr"));
 	KASSERT (pp->acw == 0, ("g_destroy_provider with acw"));
 	KASSERT (pp->ace == 0, ("g_destroy_provider with ace"));
 	g_cancel_event(pp);
 	LIST_REMOVE(pp, provider);
 	gp = pp->geom;
 	devstat_remove_entry(pp->stat);
 	/*
 	 * If a callback was provided, send notification that the provider
 	 * is now gone.
 	 */
 	if (gp->providergone != NULL)
 		gp->providergone(pp);
 
 	g_free(pp);
 	if ((gp->flags & G_GEOM_WITHER))
 		g_do_wither();
 }
 
 /*
  * We keep the "geoms" list sorted by topological order (== increasing
  * numerical rank) at all times.
  * When an attach is done, the attaching geoms rank is invalidated
  * and it is moved to the tail of the list.
  * All geoms later in the sequence has their ranks reevaluated in
  * sequence.  If we cannot assign rank to a geom because it's
  * prerequisites do not have rank, we move that element to the tail
  * of the sequence with invalid rank as well.
  * At some point we encounter our original geom and if we stil fail
  * to assign it a rank, there must be a loop and we fail back to
  * g_attach() which detach again and calls redo_rank again
  * to fix up the damage.
  * It would be much simpler code wise to do it recursively, but we
  * can't risk that on the kernel stack.
  */
 
 static int
 redo_rank(struct g_geom *gp)
 {
 	struct g_consumer *cp;
 	struct g_geom *gp1, *gp2;
 	int n, m;
 
 	g_topology_assert();
 	G_VALID_GEOM(gp);
 
 	/* Invalidate this geoms rank and move it to the tail */
 	gp1 = TAILQ_NEXT(gp, geoms);
 	if (gp1 != NULL) {
 		gp->rank = 0;
 		TAILQ_REMOVE(&geoms, gp, geoms);
 		TAILQ_INSERT_TAIL(&geoms, gp, geoms);
 	} else {
 		gp1 = gp;
 	}
 
 	/* re-rank the rest of the sequence */
 	for (; gp1 != NULL; gp1 = gp2) {
 		gp1->rank = 0;
 		m = 1;
 		LIST_FOREACH(cp, &gp1->consumer, consumer) {
 			if (cp->provider == NULL)
 				continue;
 			n = cp->provider->geom->rank;
 			if (n == 0) {
 				m = 0;
 				break;
 			} else if (n >= m)
 				m = n + 1;
 		}
 		gp1->rank = m;
 		gp2 = TAILQ_NEXT(gp1, geoms);
 
 		/* got a rank, moving on */
 		if (m != 0)
 			continue;
 
 		/* no rank to original geom means loop */
 		if (gp == gp1) 
 			return (ELOOP);
 
 		/* no rank, put it at the end move on */
 		TAILQ_REMOVE(&geoms, gp1, geoms);
 		TAILQ_INSERT_TAIL(&geoms, gp1, geoms);
 	}
 	return (0);
 }
 
 int
 g_attach(struct g_consumer *cp, struct g_provider *pp)
 {
 	int error;
 
 	g_topology_assert();
 	G_VALID_CONSUMER(cp);
 	G_VALID_PROVIDER(pp);
 	g_trace(G_T_TOPOLOGY, "g_attach(%p, %p)", cp, pp);
 	KASSERT(cp->provider == NULL, ("attach but attached"));
 	cp->provider = pp;
 	cp->flags &= ~G_CF_ORPHAN;
 	LIST_INSERT_HEAD(&pp->consumers, cp, consumers);
 	error = redo_rank(cp->geom);
 	if (error) {
 		LIST_REMOVE(cp, consumers);
 		cp->provider = NULL;
 		redo_rank(cp->geom);
 	}
 	return (error);
 }
 
 void
 g_detach(struct g_consumer *cp)
 {
 	struct g_provider *pp;
 
 	g_topology_assert();
 	G_VALID_CONSUMER(cp);
 	g_trace(G_T_TOPOLOGY, "g_detach(%p)", cp);
 	KASSERT(cp->provider != NULL, ("detach but not attached"));
 	KASSERT(cp->acr == 0, ("detach but nonzero acr"));
 	KASSERT(cp->acw == 0, ("detach but nonzero acw"));
 	KASSERT(cp->ace == 0, ("detach but nonzero ace"));
 	KASSERT(cp->nstart == cp->nend,
 	    ("detach with active requests"));
 	pp = cp->provider;
 	LIST_REMOVE(cp, consumers);
 	cp->provider = NULL;
 	if ((cp->geom->flags & G_GEOM_WITHER) ||
 	    (pp->geom->flags & G_GEOM_WITHER) ||
 	    (pp->flags & G_PF_WITHER))
 		g_do_wither();
 	redo_rank(cp->geom);
 }
 
 /*
  * g_access()
  *
  * Access-check with delta values.  The question asked is "can provider
  * "cp" change the access counters by the relative amounts dc[rwe] ?"
  */
 
 int
 g_access(struct g_consumer *cp, int dcr, int dcw, int dce)
 {
 	struct g_provider *pp;
 	struct g_geom *gp;
 	int pw, pe;
 #ifdef INVARIANTS
 	int sr, sw, se;
 #endif
 	int error;
 
 	g_topology_assert();
 	G_VALID_CONSUMER(cp);
 	pp = cp->provider;
 	KASSERT(pp != NULL, ("access but not attached"));
 	G_VALID_PROVIDER(pp);
 	gp = pp->geom;
 
 	g_trace(G_T_ACCESS, "g_access(%p(%s), %d, %d, %d)",
 	    cp, pp->name, dcr, dcw, dce);
 
 	KASSERT(cp->acr + dcr >= 0, ("access resulting in negative acr"));
 	KASSERT(cp->acw + dcw >= 0, ("access resulting in negative acw"));
 	KASSERT(cp->ace + dce >= 0, ("access resulting in negative ace"));
 	KASSERT(dcr != 0 || dcw != 0 || dce != 0, ("NOP access request"));
 	KASSERT(gp->access != NULL, ("NULL geom->access"));
 
 	/*
 	 * If our class cares about being spoiled, and we have been, we
 	 * are probably just ahead of the event telling us that.  Fail
 	 * now rather than having to unravel this later.
 	 */
 	if (cp->geom->spoiled != NULL && (cp->flags & G_CF_SPOILED) &&
 	    (dcr > 0 || dcw > 0 || dce > 0))
 		return (ENXIO);
 
 	/*
 	 * A number of GEOM classes either need to perform an I/O on the first
 	 * open or to acquire a different subsystem's lock.  To do that they
 	 * may have to drop the topology lock.
 	 * Other GEOM classes perform special actions when opening a lower rank
 	 * geom for the first time.  As a result, more than one thread may
 	 * end up performing the special actions.
 	 * So, we prevent concurrent "first" opens by marking the consumer with
 	 * special flag.
 	 *
 	 * Note that if the geom's access method never drops the topology lock,
 	 * then we will never see G_GEOM_IN_ACCESS here.
 	 */
 	while ((gp->flags & G_GEOM_IN_ACCESS) != 0) {
 		g_trace(G_T_ACCESS,
 		    "%s: race on geom %s via provider %s and consumer of %s",
 		    __func__, gp->name, pp->name, cp->geom->name);
 		gp->flags |= G_GEOM_ACCESS_WAIT;
 		g_topology_sleep(gp, 0);
 	}
 
 	/*
 	 * Figure out what counts the provider would have had, if this
 	 * consumer had (r0w0e0) at this time.
 	 */
 	pw = pp->acw - cp->acw;
 	pe = pp->ace - cp->ace;
 
 	g_trace(G_T_ACCESS,
     "open delta:[r%dw%de%d] old:[r%dw%de%d] provider:[r%dw%de%d] %p(%s)",
 	    dcr, dcw, dce,
 	    cp->acr, cp->acw, cp->ace,
 	    pp->acr, pp->acw, pp->ace,
 	    pp, pp->name);
 
 	/* If foot-shooting is enabled, any open on rank#1 is OK */
 	if ((g_debugflags & 16) && gp->rank == 1)
 		;
 	/* If we try exclusive but already write: fail */
 	else if (dce > 0 && pw > 0)
 		return (EPERM);
 	/* If we try write but already exclusive: fail */
 	else if (dcw > 0 && pe > 0)
 		return (EPERM);
 	/* If we try to open more but provider is error'ed: fail */
 	else if ((dcr > 0 || dcw > 0 || dce > 0) && pp->error != 0) {
 		printf("%s(%d): provider %s has error %d set\n",
 		    __func__, __LINE__, pp->name, pp->error);
 		return (pp->error);
 	}
 
 	/* Ok then... */
 
 #ifdef INVARIANTS
 	sr = cp->acr;
 	sw = cp->acw;
 	se = cp->ace;
 #endif
 	gp->flags |= G_GEOM_IN_ACCESS;
 	error = gp->access(pp, dcr, dcw, dce);
 	KASSERT(dcr > 0 || dcw > 0 || dce > 0 || error == 0,
 	    ("Geom provider %s::%s dcr=%d dcw=%d dce=%d error=%d failed "
 	    "closing ->access()", gp->class->name, pp->name, dcr, dcw,
 	    dce, error));
 
 	g_topology_assert();
 	gp->flags &= ~G_GEOM_IN_ACCESS;
 	KASSERT(cp->acr == sr && cp->acw == sw && cp->ace == se,
 	    ("Access counts changed during geom->access"));
 	if ((gp->flags & G_GEOM_ACCESS_WAIT) != 0) {
 		gp->flags &= ~G_GEOM_ACCESS_WAIT;
 		wakeup(gp);
 	}
 
 	if (!error) {
 		/*
 		 * If we open first write, spoil any partner consumers.
 		 * If we close last write and provider is not errored,
 		 * trigger re-taste.
 		 */
 		if (pp->acw == 0 && dcw != 0)
 			g_spoil(pp, cp);
 		else if (pp->acw != 0 && pp->acw == -dcw && pp->error == 0 &&
 		    !(gp->flags & G_GEOM_WITHER))
 			g_post_event(g_new_provider_event, pp, M_WAITOK, 
 			    pp, NULL);
 
 		pp->acr += dcr;
 		pp->acw += dcw;
 		pp->ace += dce;
 		cp->acr += dcr;
 		cp->acw += dcw;
 		cp->ace += dce;
 		if (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)
 			KASSERT(pp->sectorsize > 0,
 			    ("Provider %s lacks sectorsize", pp->name));
 		if ((cp->geom->flags & G_GEOM_WITHER) &&
 		    cp->acr == 0 && cp->acw == 0 && cp->ace == 0)
 			g_do_wither();
 	}
 	return (error);
 }
 
 int
 g_handleattr_int(struct bio *bp, const char *attribute, int val)
 {
 
 	return (g_handleattr(bp, attribute, &val, sizeof val));
 }
 
 int
 g_handleattr_uint16_t(struct bio *bp, const char *attribute, uint16_t val)
 {
 
 	return (g_handleattr(bp, attribute, &val, sizeof val));
 }
 
 int
 g_handleattr_off_t(struct bio *bp, const char *attribute, off_t val)
 {
 
 	return (g_handleattr(bp, attribute, &val, sizeof val));
 }
 
 int
 g_handleattr_str(struct bio *bp, const char *attribute, const char *str)
 {
 
 	return (g_handleattr(bp, attribute, str, 0));
 }
 
 int
 g_handleattr(struct bio *bp, const char *attribute, const void *val, int len)
 {
 	int error = 0;
 
 	if (strcmp(bp->bio_attribute, attribute))
 		return (0);
 	if (len == 0) {
 		bzero(bp->bio_data, bp->bio_length);
 		if (strlcpy(bp->bio_data, val, bp->bio_length) >=
 		    bp->bio_length) {
 			printf("%s: %s %s bio_length %jd strlen %zu -> EFAULT\n",
 			    __func__, bp->bio_to->name, attribute,
 			    (intmax_t)bp->bio_length, strlen(val));
 			error = EFAULT;
 		}
 	} else if (bp->bio_length == len) {
 		bcopy(val, bp->bio_data, len);
 	} else {
 		printf("%s: %s %s bio_length %jd len %d -> EFAULT\n", __func__,
 		    bp->bio_to->name, attribute, (intmax_t)bp->bio_length, len);
 		error = EFAULT;
 	}
 	if (error == 0)
 		bp->bio_completed = bp->bio_length;
 	g_io_deliver(bp, error);
 	return (1);
 }
 
 int
 g_std_access(struct g_provider *pp,
 	int dr __unused, int dw __unused, int de __unused)
 {
 
 	g_topology_assert();
 	G_VALID_PROVIDER(pp);
         return (0);
 }
 
 void
 g_std_done(struct bio *bp)
 {
 	struct bio *bp2;
 
 	bp2 = bp->bio_parent;
 	if (bp2->bio_error == 0)
 		bp2->bio_error = bp->bio_error;
 	bp2->bio_completed += bp->bio_completed;
 	g_destroy_bio(bp);
 	bp2->bio_inbed++;
 	if (bp2->bio_children == bp2->bio_inbed)
 		g_io_deliver(bp2, bp2->bio_error);
 }
 
 /* XXX: maybe this is only g_slice_spoiled */
 
 void
 g_std_spoiled(struct g_consumer *cp)
 {
 	struct g_geom *gp;
 	struct g_provider *pp;
 
 	g_topology_assert();
 	G_VALID_CONSUMER(cp);
 	g_trace(G_T_TOPOLOGY, "g_std_spoiled(%p)", cp);
 	cp->flags |= G_CF_ORPHAN;
 	g_detach(cp);
 	gp = cp->geom;
 	LIST_FOREACH(pp, &gp->provider, provider)
 		g_orphan_provider(pp, ENXIO);
 	g_destroy_consumer(cp);
 	if (LIST_EMPTY(&gp->provider) && LIST_EMPTY(&gp->consumer))
 		g_destroy_geom(gp);
 	else
 		gp->flags |= G_GEOM_WITHER;
 }
 
 /*
  * Spoiling happens when a provider is opened for writing, but consumers
  * which are configured by in-band data are attached (slicers for instance).
  * Since the write might potentially change the in-band data, such consumers
  * need to re-evaluate their existence after the writing session closes.
  * We do this by (offering to) tear them down when the open for write happens
  * in return for a re-taste when it closes again.
  * Together with the fact that such consumers grab an 'e' bit whenever they
  * are open, regardless of mode, this ends up DTRT.
  */
 
 static void
 g_spoil_event(void *arg, int flag)
 {
 	struct g_provider *pp;
 	struct g_consumer *cp, *cp2;
 
 	g_topology_assert();
 	if (flag == EV_CANCEL)
 		return;
 	pp = arg;
 	G_VALID_PROVIDER(pp);
 	g_trace(G_T_TOPOLOGY, "%s %p(%s:%s:%s)", __func__, pp,
 	    pp->geom->class->name, pp->geom->name, pp->name);
 	for (cp = LIST_FIRST(&pp->consumers); cp != NULL; cp = cp2) {
 		cp2 = LIST_NEXT(cp, consumers);
 		if ((cp->flags & G_CF_SPOILED) == 0)
 			continue;
 		cp->flags &= ~G_CF_SPOILED;
 		if (cp->geom->spoiled == NULL)
 			continue;
 		cp->geom->spoiled(cp);
 		g_topology_assert();
 	}
 }
 
 void
 g_spoil(struct g_provider *pp, struct g_consumer *cp)
 {
 	struct g_consumer *cp2;
 
 	g_topology_assert();
 	G_VALID_PROVIDER(pp);
 	G_VALID_CONSUMER(cp);
 
 	LIST_FOREACH(cp2, &pp->consumers, consumers) {
 		if (cp2 == cp)
 			continue;
 /*
 		KASSERT(cp2->acr == 0, ("spoiling cp->acr = %d", cp2->acr));
 		KASSERT(cp2->acw == 0, ("spoiling cp->acw = %d", cp2->acw));
 */
 		KASSERT(cp2->ace == 0, ("spoiling cp->ace = %d", cp2->ace));
 		cp2->flags |= G_CF_SPOILED;
 	}
 	g_post_event(g_spoil_event, pp, M_WAITOK, pp, NULL);
 }
 
 static void
 g_media_changed_event(void *arg, int flag)
 {
 	struct g_provider *pp;
 	int retaste;
 
 	g_topology_assert();
 	if (flag == EV_CANCEL)
 		return;
 	pp = arg;
 	G_VALID_PROVIDER(pp);
 
 	/*
 	 * If provider was not open for writing, queue retaste after spoiling.
 	 * If it was, retaste will happen automatically on close.
 	 */
 	retaste = (pp->acw == 0 && pp->error == 0 &&
 	    !(pp->geom->flags & G_GEOM_WITHER));
 	g_spoil_event(arg, flag);
 	if (retaste)
 		g_post_event(g_new_provider_event, pp, M_WAITOK, pp, NULL);
 }
 
 int
 g_media_changed(struct g_provider *pp, int flag)
 {
 	struct g_consumer *cp;
 
 	LIST_FOREACH(cp, &pp->consumers, consumers)
 		cp->flags |= G_CF_SPOILED;
 	return (g_post_event(g_media_changed_event, pp, flag, pp, NULL));
 }
 
 int
 g_media_gone(struct g_provider *pp, int flag)
 {
 	struct g_consumer *cp;
 
 	LIST_FOREACH(cp, &pp->consumers, consumers)
 		cp->flags |= G_CF_SPOILED;
 	return (g_post_event(g_spoil_event, pp, flag, pp, NULL));
 }
 
 int
 g_getattr__(const char *attr, struct g_consumer *cp, void *var, int len)
 {
 	int error, i;
 
 	i = len;
 	error = g_io_getattr(attr, cp, &i, var);
 	if (error)
 		return (error);
 	if (i != len)
 		return (EINVAL);
 	return (0);
 }
 
 static int
 g_get_device_prefix_len(const char *name)
 {
 	int len;
 
 	if (strncmp(name, "ada", 3) == 0)
 		len = 3;
 	else if (strncmp(name, "ad", 2) == 0)
 		len = 2;
 	else
 		return (0);
 	if (name[len] < '0' || name[len] > '9')
 		return (0);
 	do {
 		len++;
 	} while (name[len] >= '0' && name[len] <= '9');
 	return (len);
 }
 
 int
 g_compare_names(const char *namea, const char *nameb)
 {
 	int deva, devb;
 
 	if (strcmp(namea, nameb) == 0)
 		return (1);
 	deva = g_get_device_prefix_len(namea);
 	if (deva == 0)
 		return (0);
 	devb = g_get_device_prefix_len(nameb);
 	if (devb == 0)
 		return (0);
 	if (strcmp(namea + deva, nameb + devb) == 0)
 		return (1);
 	return (0);
 }
 
 void
 g_geom_add_alias(struct g_geom *gp, const char *alias)
 {
 	struct g_geom_alias *gap;
 
 	gap = (struct g_geom_alias *)g_malloc(
 		sizeof(struct g_geom_alias) + strlen(alias) + 1, M_WAITOK);
 	strcpy((char *)(gap + 1), alias);
 	gap->ga_alias = (const char *)(gap + 1);
 	LIST_INSERT_HEAD(&gp->aliases, gap, ga_next);
 }
 
 #if defined(DIAGNOSTIC) || defined(DDB)
 /*
  * This function walks the mesh and returns a non-zero integer if it
  * finds the argument pointer is an object. The return value indicates
  * which type of object it is believed to be. If topology is not locked,
  * this function is potentially dangerous, but we don't assert that the
  * topology lock is held when called from debugger.
  */
 int
 g_valid_obj(void const *ptr)
 {
 	struct g_class *mp;
 	struct g_geom *gp;
 	struct g_consumer *cp;
 	struct g_provider *pp;
 
 #ifdef KDB
 	if (kdb_active == 0)
 #endif
 		g_topology_assert();
 
 	LIST_FOREACH(mp, &g_classes, class) {
 		if (ptr == mp)
 			return (1);
 		LIST_FOREACH(gp, &mp->geom, geom) {
 			if (ptr == gp)
 				return (2);
 			LIST_FOREACH(cp, &gp->consumer, consumer)
 				if (ptr == cp)
 					return (3);
 			LIST_FOREACH(pp, &gp->provider, provider)
 				if (ptr == pp)
 					return (4);
 		}
 	}
 	return(0);
 }
 #endif
 
 #ifdef DDB
 
 #define	gprintf(...)	do {						\
 	db_printf("%*s", indent, "");					\
 	db_printf(__VA_ARGS__);						\
 } while (0)
 #define	gprintln(...)	do {						\
 	gprintf(__VA_ARGS__);						\
 	db_printf("\n");						\
 } while (0)
 
 #define	ADDFLAG(obj, flag, sflag)	do {				\
 	if ((obj)->flags & (flag)) {					\
 		if (comma)						\
 			strlcat(str, ",", size);			\
 		strlcat(str, (sflag), size);				\
 		comma = 1;						\
 	}								\
 } while (0)
 
 static char *
 provider_flags_to_string(struct g_provider *pp, char *str, size_t size)
 {
 	int comma = 0;
 
 	bzero(str, size);
 	if (pp->flags == 0) {
 		strlcpy(str, "NONE", size);
 		return (str);
 	}
 	ADDFLAG(pp, G_PF_WITHER, "G_PF_WITHER");
 	ADDFLAG(pp, G_PF_ORPHAN, "G_PF_ORPHAN");
 	return (str);
 }
 
 static char *
 geom_flags_to_string(struct g_geom *gp, char *str, size_t size)
 {
 	int comma = 0;
 
 	bzero(str, size);
 	if (gp->flags == 0) {
 		strlcpy(str, "NONE", size);
 		return (str);
 	}
 	ADDFLAG(gp, G_GEOM_WITHER, "G_GEOM_WITHER");
 	return (str);
 }
 static void
 db_show_geom_consumer(int indent, struct g_consumer *cp)
 {
 
 	if (indent == 0) {
 		gprintln("consumer: %p", cp);
 		gprintln("  class:    %s (%p)", cp->geom->class->name,
 		    cp->geom->class);
 		gprintln("  geom:     %s (%p)", cp->geom->name, cp->geom);
 		if (cp->provider == NULL)
 			gprintln("  provider: none");
 		else {
 			gprintln("  provider: %s (%p)", cp->provider->name,
 			    cp->provider);
 		}
 		gprintln("  access:   r%dw%de%d", cp->acr, cp->acw, cp->ace);
 		gprintln("  flags:    0x%04x", cp->flags);
 		gprintln("  nstart:   %u", cp->nstart);
 		gprintln("  nend:     %u", cp->nend);
 	} else {
 		gprintf("consumer: %p (%s), access=r%dw%de%d", cp,
 		    cp->provider != NULL ? cp->provider->name : "none",
 		    cp->acr, cp->acw, cp->ace);
 		if (cp->flags)
 			db_printf(", flags=0x%04x", cp->flags);
 		db_printf("\n");
 	}
 }
 
 static void
 db_show_geom_provider(int indent, struct g_provider *pp)
 {
 	struct g_consumer *cp;
 	char flags[64];
 
 	if (indent == 0) {
 		gprintln("provider: %s (%p)", pp->name, pp);
 		gprintln("  class:        %s (%p)", pp->geom->class->name,
 		    pp->geom->class);
 		gprintln("  geom:         %s (%p)", pp->geom->name, pp->geom);
 		gprintln("  mediasize:    %jd", (intmax_t)pp->mediasize);
 		gprintln("  sectorsize:   %u", pp->sectorsize);
 		gprintln("  stripesize:   %ju", (uintmax_t)pp->stripesize);
 		gprintln("  stripeoffset: %ju", (uintmax_t)pp->stripeoffset);
 		gprintln("  access:       r%dw%de%d", pp->acr, pp->acw,
 		    pp->ace);
 		gprintln("  flags:        %s (0x%04x)",
 		    provider_flags_to_string(pp, flags, sizeof(flags)),
 		    pp->flags);
 		gprintln("  error:        %d", pp->error);
 		gprintln("  nstart:       %u", pp->nstart);
 		gprintln("  nend:         %u", pp->nend);
 		if (LIST_EMPTY(&pp->consumers))
 			gprintln("  consumers:    none");
 	} else {
 		gprintf("provider: %s (%p), access=r%dw%de%d",
 		    pp->name, pp, pp->acr, pp->acw, pp->ace);
 		if (pp->flags != 0) {
 			db_printf(", flags=%s (0x%04x)",
 			    provider_flags_to_string(pp, flags, sizeof(flags)),
 			    pp->flags);
 		}
 		db_printf("\n");
 	}
 	if (!LIST_EMPTY(&pp->consumers)) {
 		LIST_FOREACH(cp, &pp->consumers, consumers) {
 			db_show_geom_consumer(indent + 2, cp);
 			if (db_pager_quit)
 				break;
 		}
 	}
 }
 
 static void
 db_show_geom_geom(int indent, struct g_geom *gp)
 {
 	struct g_provider *pp;
 	struct g_consumer *cp;
 	char flags[64];
 
 	if (indent == 0) {
 		gprintln("geom: %s (%p)", gp->name, gp);
 		gprintln("  class:     %s (%p)", gp->class->name, gp->class);
 		gprintln("  flags:     %s (0x%04x)",
 		    geom_flags_to_string(gp, flags, sizeof(flags)), gp->flags);
 		gprintln("  rank:      %d", gp->rank);
 		if (LIST_EMPTY(&gp->provider))
 			gprintln("  providers: none");
 		if (LIST_EMPTY(&gp->consumer))
 			gprintln("  consumers: none");
 	} else {
 		gprintf("geom: %s (%p), rank=%d", gp->name, gp, gp->rank);
 		if (gp->flags != 0) {
 			db_printf(", flags=%s (0x%04x)",
 			    geom_flags_to_string(gp, flags, sizeof(flags)),
 			    gp->flags);
 		}
 		db_printf("\n");
 	}
 	if (!LIST_EMPTY(&gp->provider)) {
 		LIST_FOREACH(pp, &gp->provider, provider) {
 			db_show_geom_provider(indent + 2, pp);
 			if (db_pager_quit)
 				break;
 		}
 	}
 	if (!LIST_EMPTY(&gp->consumer)) {
 		LIST_FOREACH(cp, &gp->consumer, consumer) {
 			db_show_geom_consumer(indent + 2, cp);
 			if (db_pager_quit)
 				break;
 		}
 	}
 }
 
 static void
 db_show_geom_class(struct g_class *mp)
 {
 	struct g_geom *gp;
 
 	db_printf("class: %s (%p)\n", mp->name, mp);
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		db_show_geom_geom(2, gp);
 		if (db_pager_quit)
 			break;
 	}
 }
 
 /*
  * Print the GEOM topology or the given object.
  */
 DB_SHOW_COMMAND(geom, db_show_geom)
 {
 	struct g_class *mp;
 
 	if (!have_addr) {
 		/* No address given, print the entire topology. */
 		LIST_FOREACH(mp, &g_classes, class) {
 			db_show_geom_class(mp);
 			db_printf("\n");
 			if (db_pager_quit)
 				break;
 		}
 	} else {
 		switch (g_valid_obj((void *)addr)) {
 		case 1:
 			db_show_geom_class((struct g_class *)addr);
 			break;
 		case 2:
 			db_show_geom_geom(0, (struct g_geom *)addr);
 			break;
 		case 3:
 			db_show_geom_consumer(0, (struct g_consumer *)addr);
 			break;
 		case 4:
 			db_show_geom_provider(0, (struct g_provider *)addr);
 			break;
 		default:
 			db_printf("Not a GEOM object.\n");
 			break;
 		}
 	}
 }
 
 static void
 db_print_bio_cmd(struct bio *bp)
 {
 	db_printf("  cmd: ");
 	switch (bp->bio_cmd) {
 	case BIO_READ: db_printf("BIO_READ"); break;
 	case BIO_WRITE: db_printf("BIO_WRITE"); break;
 	case BIO_DELETE: db_printf("BIO_DELETE"); break;
 	case BIO_GETATTR: db_printf("BIO_GETATTR"); break;
 	case BIO_FLUSH: db_printf("BIO_FLUSH"); break;
 	case BIO_CMD0: db_printf("BIO_CMD0"); break;
 	case BIO_CMD1: db_printf("BIO_CMD1"); break;
 	case BIO_CMD2: db_printf("BIO_CMD2"); break;
 	case BIO_ZONE: db_printf("BIO_ZONE"); break;
 	default: db_printf("UNKNOWN"); break;
 	}
 	db_printf("\n");
 }
 
 static void
 db_print_bio_flags(struct bio *bp)
 {
 	int comma;
 
 	comma = 0;
 	db_printf("  flags: ");
 	if (bp->bio_flags & BIO_ERROR) {
 		db_printf("BIO_ERROR");
 		comma = 1;
 	}
 	if (bp->bio_flags & BIO_DONE) {
 		db_printf("%sBIO_DONE", (comma ? ", " : ""));
 		comma = 1;
 	}
 	if (bp->bio_flags & BIO_ONQUEUE)
 		db_printf("%sBIO_ONQUEUE", (comma ? ", " : ""));
 	db_printf("\n");
 }
 
 /*
  * Print useful information in a BIO
  */
 DB_SHOW_COMMAND(bio, db_show_bio)
 {
 	struct bio *bp;
 
 	if (have_addr) {
 		bp = (struct bio *)addr;
 		db_printf("BIO %p\n", bp);
 		db_print_bio_cmd(bp);
 		db_print_bio_flags(bp);
 		db_printf("  cflags: 0x%hx\n", bp->bio_cflags);
 		db_printf("  pflags: 0x%hx\n", bp->bio_pflags);
 		db_printf("  offset: %jd\n", (intmax_t)bp->bio_offset);
 		db_printf("  length: %jd\n", (intmax_t)bp->bio_length);
 		db_printf("  bcount: %ld\n", bp->bio_bcount);
 		db_printf("  resid: %ld\n", bp->bio_resid);
 		db_printf("  completed: %jd\n", (intmax_t)bp->bio_completed);
 		db_printf("  children: %u\n", bp->bio_children);
 		db_printf("  inbed: %u\n", bp->bio_inbed);
 		db_printf("  error: %d\n", bp->bio_error);
 		db_printf("  parent: %p\n", bp->bio_parent);
 		db_printf("  driver1: %p\n", bp->bio_driver1);
 		db_printf("  driver2: %p\n", bp->bio_driver2);
 		db_printf("  caller1: %p\n", bp->bio_caller1);
 		db_printf("  caller2: %p\n", bp->bio_caller2);
 		db_printf("  bio_from: %p\n", bp->bio_from);
 		db_printf("  bio_to: %p\n", bp->bio_to);
 
 #if defined(BUF_TRACKING) || defined(FULL_BUF_TRACKING)
 		db_printf("  bio_track_bp: %p\n", bp->bio_track_bp);
 #endif
 	}
 }
 
 #undef	gprintf
 #undef	gprintln
 #undef	ADDFLAG
 
 #endif	/* DDB */
Index: head/sys/geom/geom_vfs.c
===================================================================
--- head/sys/geom/geom_vfs.c	(revision 350693)
+++ head/sys/geom/geom_vfs.c	(revision 350694)
@@ -1,290 +1,289 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2004 Poul-Henning Kamp
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bio.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
+#include <sys/sbuf.h>
 #include <sys/vnode.h>
 #include <sys/mount.h>
 
 #include <geom/geom.h>
 #include <geom/geom_vfs.h>
 
 /*
  * subroutines for use by filesystems.
  *
  * XXX: should maybe live somewhere else ?
  */
 #include <sys/buf.h>
 
 struct g_vfs_softc {
 	struct mtx	 sc_mtx;
 	struct bufobj	*sc_bo;
 	int		 sc_active;
 	int		 sc_orphaned;
 };
 
 static struct buf_ops __g_vfs_bufops = {
 	.bop_name =	"GEOM_VFS",
 	.bop_write =	bufwrite,
 	.bop_strategy =	g_vfs_strategy,	
 	.bop_sync =	bufsync,	
 	.bop_bdflush =	bufbdflush
 };
 
 struct buf_ops *g_vfs_bufops = &__g_vfs_bufops;
 
 static g_orphan_t g_vfs_orphan;
 
 static struct g_class g_vfs_class = {
 	.name =		"VFS",
 	.version =	G_VERSION,
 	.orphan =	g_vfs_orphan,
 };
 
 DECLARE_GEOM_CLASS(g_vfs_class, g_vfs);
 
 static void
 g_vfs_destroy(void *arg, int flags __unused)
 {
 	struct g_consumer *cp;
 
 	g_topology_assert();
 	cp = arg;
 	if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
 		g_access(cp, -cp->acr, -cp->acw, -cp->ace);
 	g_detach(cp);
 	if (cp->geom->softc == NULL)
 		g_wither_geom(cp->geom, ENXIO);
 }
 
 static void
 g_vfs_done(struct bio *bip)
 {
 	struct g_consumer *cp;
 	struct g_vfs_softc *sc;
 	struct buf *bp;
 	int destroy;
 	struct mount *mp;
 	struct vnode *vp;
 	struct cdev *cdevp;
 
 	/*
 	 * Collect statistics on synchronous and asynchronous read
 	 * and write counts for disks that have associated filesystems.
 	 */
 	bp = bip->bio_caller2;
 	vp = bp->b_vp;
 	if (vp != NULL) {
 		/*
 		 * If not a disk vnode, use its associated mount point
 		 * otherwise use the mountpoint associated with the disk.
 		 */
 		VI_LOCK(vp);
 		if (vp->v_type != VCHR ||
 		    (cdevp = vp->v_rdev) == NULL ||
 		    cdevp->si_devsw == NULL ||
 		    (cdevp->si_devsw->d_flags & D_DISK) == 0)
 			mp = vp->v_mount;
 		else
 			mp = cdevp->si_mountpt;
 		if (mp != NULL) {
 			if (bp->b_iocmd == BIO_READ) {
 				if (LK_HOLDER(bp->b_lock.lk_lock) == LK_KERNPROC)
 					mp->mnt_stat.f_asyncreads++;
 				else
 					mp->mnt_stat.f_syncreads++;
 			} else if (bp->b_iocmd == BIO_WRITE) {
 				if (LK_HOLDER(bp->b_lock.lk_lock) == LK_KERNPROC)
 					mp->mnt_stat.f_asyncwrites++;
 				else
 					mp->mnt_stat.f_syncwrites++;
 			}
 		}
 		VI_UNLOCK(vp);
 	}
 
 	cp = bip->bio_from;
 	sc = cp->geom->softc;
-	if (bip->bio_error) {
-		printf("g_vfs_done():");
-		g_print_bio(bip);
-		printf("error = %d\n", bip->bio_error);
-	}
+	if (bip->bio_error)
+		g_print_bio("g_vfs_done():", bip, "error = %d",
+		    bip->bio_error);
 	bp->b_error = bip->bio_error;
 	bp->b_ioflags = bip->bio_flags;
 	if (bip->bio_error)
 		bp->b_ioflags |= BIO_ERROR;
 	bp->b_resid = bp->b_bcount - bip->bio_completed;
 	g_destroy_bio(bip);
 
 	mtx_lock(&sc->sc_mtx);
 	destroy = ((--sc->sc_active) == 0 && sc->sc_orphaned);
 	mtx_unlock(&sc->sc_mtx);
 	if (destroy)
 		g_post_event(g_vfs_destroy, cp, M_WAITOK, NULL);
 
 	bufdone(bp);
 }
 
 void
 g_vfs_strategy(struct bufobj *bo, struct buf *bp)
 {
 	struct g_vfs_softc *sc;
 	struct g_consumer *cp;
 	struct bio *bip;
 
 	cp = bo->bo_private;
 	sc = cp->geom->softc;
 
 	/*
 	 * If the provider has orphaned us, just return ENXIO.
 	 */
 	mtx_lock(&sc->sc_mtx);
 	if (sc->sc_orphaned) {
 		mtx_unlock(&sc->sc_mtx);
 		bp->b_error = ENXIO;
 		bp->b_ioflags |= BIO_ERROR;
 		bufdone(bp);
 		return;
 	}
 	sc->sc_active++;
 	mtx_unlock(&sc->sc_mtx);
 
 	bip = g_alloc_bio();
 	bip->bio_cmd = bp->b_iocmd;
 	bip->bio_offset = bp->b_iooffset;
 	bip->bio_length = bp->b_bcount;
 	bdata2bio(bp, bip);
 	if ((bp->b_flags & B_BARRIER) != 0) {
 		bip->bio_flags |= BIO_ORDERED;
 		bp->b_flags &= ~B_BARRIER;
 	}
 	bip->bio_done = g_vfs_done;
 	bip->bio_caller2 = bp;
 #if defined(BUF_TRACKING) || defined(FULL_BUF_TRACKING)
 	buf_track(bp, __func__);
 	bip->bio_track_bp = bp;
 #endif
 	g_io_request(bip, cp);
 }
 
 static void
 g_vfs_orphan(struct g_consumer *cp)
 {
 	struct g_geom *gp;
 	struct g_vfs_softc *sc;
 	int destroy;
 
 	g_topology_assert();
 
 	gp = cp->geom;
 	g_trace(G_T_TOPOLOGY, "g_vfs_orphan(%p(%s))", cp, gp->name);
 	sc = gp->softc;
 	if (sc == NULL)
 		return;
 	mtx_lock(&sc->sc_mtx);
 	sc->sc_orphaned = 1;
 	destroy = (sc->sc_active == 0);
 	mtx_unlock(&sc->sc_mtx);
 	if (destroy)
 		g_vfs_destroy(cp, 0);
 
 	/*
 	 * Do not destroy the geom.  Filesystem will do that during unmount.
 	 */
 }
 
 int
 g_vfs_open(struct vnode *vp, struct g_consumer **cpp, const char *fsname, int wr)
 {
 	struct g_geom *gp;
 	struct g_provider *pp;
 	struct g_consumer *cp;
 	struct g_vfs_softc *sc;
 	struct bufobj *bo;
 	int error;
 
 	g_topology_assert();
 
 	*cpp = NULL;
 	bo = &vp->v_bufobj;
 	if (bo->bo_private != vp)
 		return (EBUSY);
 
 	pp = g_dev_getprovider(vp->v_rdev);
 	if (pp == NULL)
 		return (ENOENT);
 	gp = g_new_geomf(&g_vfs_class, "%s.%s", fsname, pp->name);
 	sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO);
 	mtx_init(&sc->sc_mtx, "g_vfs", NULL, MTX_DEF);
 	sc->sc_bo = bo;
 	gp->softc = sc;
 	cp = g_new_consumer(gp);
 	g_attach(cp, pp);
 	error = g_access(cp, 1, wr, wr);
 	if (error) {
 		g_wither_geom(gp, ENXIO);
 		return (error);
 	}
 	vnode_create_vobject(vp, pp->mediasize, curthread);
 	*cpp = cp;
 	cp->private = vp;
 	cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
 	bo->bo_ops = g_vfs_bufops;
 	bo->bo_private = cp;
 	bo->bo_bsize = pp->sectorsize;
 
 	return (error);
 }
 
 void
 g_vfs_close(struct g_consumer *cp)
 {
 	struct g_geom *gp;
 	struct g_vfs_softc *sc;
 
 	g_topology_assert();
 
 	gp = cp->geom;
 	sc = gp->softc;
 	bufobj_invalbuf(sc->sc_bo, V_SAVE, 0, 0);
 	sc->sc_bo->bo_private = cp->private;
 	gp->softc = NULL;
 	mtx_destroy(&sc->sc_mtx);
 	if (!sc->sc_orphaned || cp->provider == NULL)
 		g_wither_geom_close(gp, ENXIO);
 	g_free(sc);
 }
Index: head/sys/geom/journal/g_journal.c
===================================================================
--- head/sys/geom/journal/g_journal.c	(revision 350693)
+++ head/sys/geom/journal/g_journal.c	(revision 350694)
@@ -1,3013 +1,3014 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2005-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/bio.h>
 #include <sys/sysctl.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/eventhandler.h>
 #include <sys/proc.h>
 #include <sys/kthread.h>
 #include <sys/sched.h>
 #include <sys/taskqueue.h>
 #include <sys/vnode.h>
 #include <sys/sbuf.h>
 #ifdef GJ_MEMDEBUG
 #include <sys/stack.h>
 #include <sys/kdb.h>
 #endif
 #include <vm/vm.h>
 #include <vm/vm_kern.h>
 #include <geom/geom.h>
+#include <geom/geom_dbg.h>
 
 #include <geom/journal/g_journal.h>
 
 FEATURE(geom_journal, "GEOM journaling support");
 
 /*
  * On-disk journal format:
  *
  * JH - Journal header
  * RH - Record header
  *
  * %%%%%% ****** +------+ +------+     ****** +------+     %%%%%%
  * % JH % * RH * | Data | | Data | ... * RH * | Data | ... % JH % ...
  * %%%%%% ****** +------+ +------+     ****** +------+     %%%%%%
  *
  */
 
 CTASSERT(sizeof(struct g_journal_header) <= 512);
 CTASSERT(sizeof(struct g_journal_record_header) <= 512);
 
 static MALLOC_DEFINE(M_JOURNAL, "journal_data", "GEOM_JOURNAL Data");
 static struct mtx g_journal_cache_mtx;
 MTX_SYSINIT(g_journal_cache, &g_journal_cache_mtx, "cache usage", MTX_DEF);
 
 const struct g_journal_desc *g_journal_filesystems[] = {
 	&g_journal_ufs,
 	NULL
 };
 
 SYSCTL_DECL(_kern_geom);
 
 int g_journal_debug = 0;
 static u_int g_journal_switch_time = 10;
 static u_int g_journal_force_switch = 70;
 static u_int g_journal_parallel_flushes = 16;
 static u_int g_journal_parallel_copies = 16;
 static u_int g_journal_accept_immediately = 64;
 static u_int g_journal_record_entries = GJ_RECORD_HEADER_NENTRIES;
 static u_int g_journal_do_optimize = 1;
 
 static SYSCTL_NODE(_kern_geom, OID_AUTO, journal, CTLFLAG_RW, 0,
     "GEOM_JOURNAL stuff");
 SYSCTL_INT(_kern_geom_journal, OID_AUTO, debug, CTLFLAG_RWTUN, &g_journal_debug, 0,
     "Debug level");
 SYSCTL_UINT(_kern_geom_journal, OID_AUTO, switch_time, CTLFLAG_RW,
     &g_journal_switch_time, 0, "Switch journals every N seconds");
 SYSCTL_UINT(_kern_geom_journal, OID_AUTO, force_switch, CTLFLAG_RW,
     &g_journal_force_switch, 0, "Force switch when journal is N% full");
 SYSCTL_UINT(_kern_geom_journal, OID_AUTO, parallel_flushes, CTLFLAG_RW,
     &g_journal_parallel_flushes, 0,
     "Number of flush I/O requests to send in parallel");
 SYSCTL_UINT(_kern_geom_journal, OID_AUTO, accept_immediately, CTLFLAG_RW,
     &g_journal_accept_immediately, 0,
     "Number of I/O requests accepted immediately");
 SYSCTL_UINT(_kern_geom_journal, OID_AUTO, parallel_copies, CTLFLAG_RW,
     &g_journal_parallel_copies, 0,
     "Number of copy I/O requests to send in parallel");
 static int
 g_journal_record_entries_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	u_int entries;
 	int error;
 
 	entries = g_journal_record_entries;
 	error = sysctl_handle_int(oidp, &entries, 0, req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	if (entries < 1 || entries > GJ_RECORD_HEADER_NENTRIES)
 		return (EINVAL);
 	g_journal_record_entries = entries;
 	return (0);
 }
 SYSCTL_PROC(_kern_geom_journal, OID_AUTO, record_entries,
     CTLTYPE_UINT | CTLFLAG_RW, NULL, 0, g_journal_record_entries_sysctl, "I",
     "Maximum number of entires in one journal record");
 SYSCTL_UINT(_kern_geom_journal, OID_AUTO, optimize, CTLFLAG_RW,
     &g_journal_do_optimize, 0, "Try to combine bios on flush and copy");
 
 static u_long g_journal_cache_used = 0;
 static u_long g_journal_cache_limit = 64 * 1024 * 1024;
 static u_int g_journal_cache_divisor = 2;
 static u_int g_journal_cache_switch = 90;
 static u_int g_journal_cache_misses = 0;
 static u_int g_journal_cache_alloc_failures = 0;
 static u_long g_journal_cache_low = 0;
 
 static SYSCTL_NODE(_kern_geom_journal, OID_AUTO, cache, CTLFLAG_RW, 0,
     "GEOM_JOURNAL cache");
 SYSCTL_ULONG(_kern_geom_journal_cache, OID_AUTO, used, CTLFLAG_RD,
     &g_journal_cache_used, 0, "Number of allocated bytes");
 static int
 g_journal_cache_limit_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	u_long limit;
 	int error;
 
 	limit = g_journal_cache_limit;
 	error = sysctl_handle_long(oidp, &limit, 0, req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	g_journal_cache_limit = limit;
 	g_journal_cache_low = (limit / 100) * g_journal_cache_switch;
 	return (0);
 }
 SYSCTL_PROC(_kern_geom_journal_cache, OID_AUTO, limit,
     CTLTYPE_ULONG | CTLFLAG_RWTUN, NULL, 0, g_journal_cache_limit_sysctl, "I",
     "Maximum number of allocated bytes");
 SYSCTL_UINT(_kern_geom_journal_cache, OID_AUTO, divisor, CTLFLAG_RDTUN,
     &g_journal_cache_divisor, 0,
     "(kmem_size / kern.geom.journal.cache.divisor) == cache size");
 static int
 g_journal_cache_switch_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	u_int cswitch;
 	int error;
 
 	cswitch = g_journal_cache_switch;
 	error = sysctl_handle_int(oidp, &cswitch, 0, req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	if (cswitch > 100)
 		return (EINVAL);
 	g_journal_cache_switch = cswitch;
 	g_journal_cache_low = (g_journal_cache_limit / 100) * cswitch;
 	return (0);
 }
 SYSCTL_PROC(_kern_geom_journal_cache, OID_AUTO, switch,
     CTLTYPE_UINT | CTLFLAG_RW, NULL, 0, g_journal_cache_switch_sysctl, "I",
     "Force switch when we hit this percent of cache use");
 SYSCTL_UINT(_kern_geom_journal_cache, OID_AUTO, misses, CTLFLAG_RW,
     &g_journal_cache_misses, 0, "Number of cache misses");
 SYSCTL_UINT(_kern_geom_journal_cache, OID_AUTO, alloc_failures, CTLFLAG_RW,
     &g_journal_cache_alloc_failures, 0, "Memory allocation failures");
 
 static u_long g_journal_stats_bytes_skipped = 0;
 static u_long g_journal_stats_combined_ios = 0;
 static u_long g_journal_stats_switches = 0;
 static u_long g_journal_stats_wait_for_copy = 0;
 static u_long g_journal_stats_journal_full = 0;
 static u_long g_journal_stats_low_mem = 0;
 
 static SYSCTL_NODE(_kern_geom_journal, OID_AUTO, stats, CTLFLAG_RW, 0,
     "GEOM_JOURNAL statistics");
 SYSCTL_ULONG(_kern_geom_journal_stats, OID_AUTO, skipped_bytes, CTLFLAG_RW,
     &g_journal_stats_bytes_skipped, 0, "Number of skipped bytes");
 SYSCTL_ULONG(_kern_geom_journal_stats, OID_AUTO, combined_ios, CTLFLAG_RW,
     &g_journal_stats_combined_ios, 0, "Number of combined I/O requests");
 SYSCTL_ULONG(_kern_geom_journal_stats, OID_AUTO, switches, CTLFLAG_RW,
     &g_journal_stats_switches, 0, "Number of journal switches");
 SYSCTL_ULONG(_kern_geom_journal_stats, OID_AUTO, wait_for_copy, CTLFLAG_RW,
     &g_journal_stats_wait_for_copy, 0, "Wait for journal copy on switch");
 SYSCTL_ULONG(_kern_geom_journal_stats, OID_AUTO, journal_full, CTLFLAG_RW,
     &g_journal_stats_journal_full, 0,
     "Number of times journal was almost full.");
 SYSCTL_ULONG(_kern_geom_journal_stats, OID_AUTO, low_mem, CTLFLAG_RW,
     &g_journal_stats_low_mem, 0, "Number of times low_mem hook was called.");
 
 static g_taste_t g_journal_taste;
 static g_ctl_req_t g_journal_config;
 static g_dumpconf_t g_journal_dumpconf;
 static g_init_t g_journal_init;
 static g_fini_t g_journal_fini;
 
 struct g_class g_journal_class = {
 	.name = G_JOURNAL_CLASS_NAME,
 	.version = G_VERSION,
 	.taste = g_journal_taste,
 	.ctlreq = g_journal_config,
 	.dumpconf = g_journal_dumpconf,
 	.init = g_journal_init,
 	.fini = g_journal_fini
 };
 
 static int g_journal_destroy(struct g_journal_softc *sc);
 static void g_journal_metadata_update(struct g_journal_softc *sc);
 static void g_journal_start_switcher(struct g_class *mp);
 static void g_journal_stop_switcher(void);
 static void g_journal_switch_wait(struct g_journal_softc *sc);
 
 #define	GJ_SWITCHER_WORKING	0
 #define	GJ_SWITCHER_DIE		1
 #define	GJ_SWITCHER_DIED	2
 static struct proc *g_journal_switcher_proc = NULL;
 static int g_journal_switcher_state = GJ_SWITCHER_WORKING;
 static int g_journal_switcher_wokenup = 0;
 static int g_journal_sync_requested = 0;
 
 #ifdef GJ_MEMDEBUG
 struct meminfo {
 	size_t		mi_size;
 	struct stack	mi_stack;
 };
 #endif
 
 /*
  * We use our own malloc/realloc/free funtions, so we can collect statistics
  * and force journal switch when we're running out of cache.
  */
 static void *
 gj_malloc(size_t size, int flags)
 {
 	void *p;
 #ifdef GJ_MEMDEBUG
 	struct meminfo *mi;
 #endif
 
 	mtx_lock(&g_journal_cache_mtx);
 	if (g_journal_cache_limit > 0 && !g_journal_switcher_wokenup &&
 	    g_journal_cache_used + size > g_journal_cache_low) {
 		GJ_DEBUG(1, "No cache, waking up the switcher.");
 		g_journal_switcher_wokenup = 1;
 		wakeup(&g_journal_switcher_state);
 	}
 	if ((flags & M_NOWAIT) && g_journal_cache_limit > 0 &&
 	    g_journal_cache_used + size > g_journal_cache_limit) {
 		mtx_unlock(&g_journal_cache_mtx);
 		g_journal_cache_alloc_failures++;
 		return (NULL);
 	}
 	g_journal_cache_used += size;
 	mtx_unlock(&g_journal_cache_mtx);
 	flags &= ~M_NOWAIT;
 #ifndef GJ_MEMDEBUG
 	p = malloc(size, M_JOURNAL, flags | M_WAITOK);
 #else
 	mi = malloc(sizeof(*mi) + size, M_JOURNAL, flags | M_WAITOK);
 	p = (u_char *)mi + sizeof(*mi);
 	mi->mi_size = size;
 	stack_save(&mi->mi_stack);
 #endif
 	return (p);
 }
 
 static void
 gj_free(void *p, size_t size)
 {
 #ifdef GJ_MEMDEBUG
 	struct meminfo *mi;
 #endif
 
 	KASSERT(p != NULL, ("p=NULL"));
 	KASSERT(size > 0, ("size=0"));
 	mtx_lock(&g_journal_cache_mtx);
 	KASSERT(g_journal_cache_used >= size, ("Freeing too much?"));
 	g_journal_cache_used -= size;
 	mtx_unlock(&g_journal_cache_mtx);
 #ifdef GJ_MEMDEBUG
 	mi = p = (void *)((u_char *)p - sizeof(*mi));
 	if (mi->mi_size != size) {
 		printf("GJOURNAL: Size mismatch! %zu != %zu\n", size,
 		    mi->mi_size);
 		printf("GJOURNAL: Alloc backtrace:\n");
 		stack_print(&mi->mi_stack);
 		printf("GJOURNAL: Free backtrace:\n");
 		kdb_backtrace();
 	}
 #endif
 	free(p, M_JOURNAL);
 }
 
 static void *
 gj_realloc(void *p, size_t size, size_t oldsize)
 {
 	void *np;
 
 #ifndef GJ_MEMDEBUG
 	mtx_lock(&g_journal_cache_mtx);
 	g_journal_cache_used -= oldsize;
 	g_journal_cache_used += size;
 	mtx_unlock(&g_journal_cache_mtx);
 	np = realloc(p, size, M_JOURNAL, M_WAITOK);
 #else
 	np = gj_malloc(size, M_WAITOK);
 	bcopy(p, np, MIN(oldsize, size));
 	gj_free(p, oldsize);
 #endif
 	return (np);
 }
 
 static void
 g_journal_check_overflow(struct g_journal_softc *sc)
 {
 	off_t length, used;
 
 	if ((sc->sc_active.jj_offset < sc->sc_inactive.jj_offset &&
 	     sc->sc_journal_offset >= sc->sc_inactive.jj_offset) ||
 	    (sc->sc_active.jj_offset > sc->sc_inactive.jj_offset &&
 	     sc->sc_journal_offset >= sc->sc_inactive.jj_offset &&
 	     sc->sc_journal_offset < sc->sc_active.jj_offset)) {
 		panic("Journal overflow "
 		    "(id = %u joffset=%jd active=%jd inactive=%jd)",
 		    (unsigned)sc->sc_id,
 		    (intmax_t)sc->sc_journal_offset,
 		    (intmax_t)sc->sc_active.jj_offset,
 		    (intmax_t)sc->sc_inactive.jj_offset);
 	}
 	if (sc->sc_active.jj_offset < sc->sc_inactive.jj_offset) {
 		length = sc->sc_inactive.jj_offset - sc->sc_active.jj_offset;
 		used = sc->sc_journal_offset - sc->sc_active.jj_offset;
 	} else {
 		length = sc->sc_jend - sc->sc_active.jj_offset;
 		length += sc->sc_inactive.jj_offset - sc->sc_jstart;
 		if (sc->sc_journal_offset >= sc->sc_active.jj_offset)
 			used = sc->sc_journal_offset - sc->sc_active.jj_offset;
 		else {
 			used = sc->sc_jend - sc->sc_active.jj_offset;
 			used += sc->sc_journal_offset - sc->sc_jstart;
 		}
 	}
 	/* Already woken up? */
 	if (g_journal_switcher_wokenup)
 		return;
 	/*
 	 * If the active journal takes more than g_journal_force_switch precent
 	 * of free journal space, we force journal switch.
 	 */
 	KASSERT(length > 0,
 	    ("length=%jd used=%jd active=%jd inactive=%jd joffset=%jd",
 	    (intmax_t)length, (intmax_t)used,
 	    (intmax_t)sc->sc_active.jj_offset,
 	    (intmax_t)sc->sc_inactive.jj_offset,
 	    (intmax_t)sc->sc_journal_offset));
 	if ((used * 100) / length > g_journal_force_switch) {
 		g_journal_stats_journal_full++;
 		GJ_DEBUG(1, "Journal %s %jd%% full, forcing journal switch.",
 		    sc->sc_name, (used * 100) / length);
 		mtx_lock(&g_journal_cache_mtx);
 		g_journal_switcher_wokenup = 1;
 		wakeup(&g_journal_switcher_state);
 		mtx_unlock(&g_journal_cache_mtx);
 	}
 }
 
 static void
 g_journal_orphan(struct g_consumer *cp)
 {
 	struct g_journal_softc *sc;
 	char name[256];
 	int error;
 
 	g_topology_assert();
 	sc = cp->geom->softc;
 	strlcpy(name, cp->provider->name, sizeof(name));
 	GJ_DEBUG(0, "Lost provider %s.", name);
 	if (sc == NULL)
 		return;
 	error = g_journal_destroy(sc);
 	if (error == 0)
 		GJ_DEBUG(0, "Journal %s destroyed.", name);
 	else {
 		GJ_DEBUG(0, "Cannot destroy journal %s (error=%d). "
 		    "Destroy it manually after last close.", sc->sc_name,
 		    error);
 	}
 }
 
 static int
 g_journal_access(struct g_provider *pp, int acr, int acw, int ace)
 {
 	struct g_journal_softc *sc;
 	int dcr, dcw, dce;
 
 	g_topology_assert();
 	GJ_DEBUG(2, "Access request for %s: r%dw%de%d.", pp->name,
 	    acr, acw, ace);
 
 	dcr = pp->acr + acr;
 	dcw = pp->acw + acw;
 	dce = pp->ace + ace;
 
 	sc = pp->geom->softc;
 	if (sc == NULL || (sc->sc_flags & GJF_DEVICE_DESTROY)) {
 		if (acr <= 0 && acw <= 0 && ace <= 0)
 			return (0);
 		else
 			return (ENXIO);
 	}
 	if (pp->acw == 0 && dcw > 0) {
 		GJ_DEBUG(1, "Marking %s as dirty.", sc->sc_name);
 		sc->sc_flags &= ~GJF_DEVICE_CLEAN;
 		g_topology_unlock();
 		g_journal_metadata_update(sc);
 		g_topology_lock();
 	} /* else if (pp->acw == 0 && dcw > 0 && JEMPTY(sc)) {
 		GJ_DEBUG(1, "Marking %s as clean.", sc->sc_name);
 		sc->sc_flags |= GJF_DEVICE_CLEAN;
 		g_topology_unlock();
 		g_journal_metadata_update(sc);
 		g_topology_lock();
 	} */
 	return (0);
 }
 
 static void
 g_journal_header_encode(struct g_journal_header *hdr, u_char *data)
 {
 
 	bcopy(GJ_HEADER_MAGIC, data, sizeof(GJ_HEADER_MAGIC));
 	data += sizeof(GJ_HEADER_MAGIC);
 	le32enc(data, hdr->jh_journal_id);
 	data += 4;
 	le32enc(data, hdr->jh_journal_next_id);
 }
 
 static int
 g_journal_header_decode(const u_char *data, struct g_journal_header *hdr)
 {
 
 	bcopy(data, hdr->jh_magic, sizeof(hdr->jh_magic));
 	data += sizeof(hdr->jh_magic);
 	if (bcmp(hdr->jh_magic, GJ_HEADER_MAGIC, sizeof(GJ_HEADER_MAGIC)) != 0)
 		return (EINVAL);
 	hdr->jh_journal_id = le32dec(data);
 	data += 4;
 	hdr->jh_journal_next_id = le32dec(data);
 	return (0);
 }
 
 static void
 g_journal_flush_cache(struct g_journal_softc *sc)
 {
 	struct bintime bt;
 	int error;
 
 	if (sc->sc_bio_flush == 0)
 		return;
 	GJ_TIMER_START(1, &bt);
 	if (sc->sc_bio_flush & GJ_FLUSH_JOURNAL) {
 		error = g_io_flush(sc->sc_jconsumer);
 		GJ_DEBUG(error == 0 ? 2 : 0, "Flush cache of %s: error=%d.",
 		    sc->sc_jconsumer->provider->name, error);
 	}
 	if (sc->sc_bio_flush & GJ_FLUSH_DATA) {
 		/*
 		 * TODO: This could be called in parallel with the
 		 *       previous call.
 		 */
 		error = g_io_flush(sc->sc_dconsumer);
 		GJ_DEBUG(error == 0 ? 2 : 0, "Flush cache of %s: error=%d.",
 		    sc->sc_dconsumer->provider->name, error);
 	}
 	GJ_TIMER_STOP(1, &bt, "Cache flush time");
 }
 
 static int
 g_journal_write_header(struct g_journal_softc *sc)
 {
 	struct g_journal_header hdr;
 	struct g_consumer *cp;
 	u_char *buf;
 	int error;
 
 	cp = sc->sc_jconsumer;
 	buf = gj_malloc(cp->provider->sectorsize, M_WAITOK);
 
 	strlcpy(hdr.jh_magic, GJ_HEADER_MAGIC, sizeof(hdr.jh_magic));
 	hdr.jh_journal_id = sc->sc_journal_id;
 	hdr.jh_journal_next_id = sc->sc_journal_next_id;
 	g_journal_header_encode(&hdr, buf);
 	error = g_write_data(cp, sc->sc_journal_offset, buf,
 	    cp->provider->sectorsize);
 	/* if (error == 0) */
 	sc->sc_journal_offset += cp->provider->sectorsize;
 
 	gj_free(buf, cp->provider->sectorsize);
 	return (error);
 }
 
 /*
  * Every journal record has a header and data following it.
  * Functions below are used to decode the header before storing it to
  * little endian and to encode it after reading to system endianness.
  */
 static void
 g_journal_record_header_encode(struct g_journal_record_header *hdr,
     u_char *data)
 {
 	struct g_journal_entry *ent;
 	u_int i;
 
 	bcopy(GJ_RECORD_HEADER_MAGIC, data, sizeof(GJ_RECORD_HEADER_MAGIC));
 	data += sizeof(GJ_RECORD_HEADER_MAGIC);
 	le32enc(data, hdr->jrh_journal_id);
 	data += 8;
 	le16enc(data, hdr->jrh_nentries);
 	data += 2;
 	bcopy(hdr->jrh_sum, data, sizeof(hdr->jrh_sum));
 	data += 8;
 	for (i = 0; i < hdr->jrh_nentries; i++) {
 		ent = &hdr->jrh_entries[i];
 		le64enc(data, ent->je_joffset);
 		data += 8;
 		le64enc(data, ent->je_offset);
 		data += 8;
 		le64enc(data, ent->je_length);
 		data += 8;
 	}
 }
 
 static int
 g_journal_record_header_decode(const u_char *data,
     struct g_journal_record_header *hdr)
 {
 	struct g_journal_entry *ent;
 	u_int i;
 
 	bcopy(data, hdr->jrh_magic, sizeof(hdr->jrh_magic));
 	data += sizeof(hdr->jrh_magic);
 	if (strcmp(hdr->jrh_magic, GJ_RECORD_HEADER_MAGIC) != 0)
 		return (EINVAL);
 	hdr->jrh_journal_id = le32dec(data);
 	data += 8;
 	hdr->jrh_nentries = le16dec(data);
 	data += 2;
 	if (hdr->jrh_nentries > GJ_RECORD_HEADER_NENTRIES)
 		return (EINVAL);
 	bcopy(data, hdr->jrh_sum, sizeof(hdr->jrh_sum));
 	data += 8;
 	for (i = 0; i < hdr->jrh_nentries; i++) {
 		ent = &hdr->jrh_entries[i];
 		ent->je_joffset = le64dec(data);
 		data += 8;
 		ent->je_offset = le64dec(data);
 		data += 8;
 		ent->je_length = le64dec(data);
 		data += 8;
 	}
 	return (0);
 }
 
 /*
  * Function reads metadata from a provider (via the given consumer), decodes
  * it to system endianness and verifies its correctness.
  */
 static int
 g_journal_metadata_read(struct g_consumer *cp, struct g_journal_metadata *md)
 {
 	struct g_provider *pp;
 	u_char *buf;
 	int error;
 
 	g_topology_assert();
 
 	error = g_access(cp, 1, 0, 0);
 	if (error != 0)
 		return (error);
 	pp = cp->provider;
 	g_topology_unlock();
 	/* Metadata is stored in last sector. */
 	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
 	    &error);
 	g_topology_lock();
 	g_access(cp, -1, 0, 0);
 	if (buf == NULL) {
 		GJ_DEBUG(1, "Cannot read metadata from %s (error=%d).",
 		    cp->provider->name, error);
 		return (error);
 	}
 
 	/* Decode metadata. */
 	error = journal_metadata_decode(buf, md);
 	g_free(buf);
 	/* Is this is gjournal provider at all? */
 	if (strcmp(md->md_magic, G_JOURNAL_MAGIC) != 0)
 		return (EINVAL);
 	/*
 	 * Are we able to handle this version of metadata?
 	 * We only maintain backward compatibility.
 	 */
 	if (md->md_version > G_JOURNAL_VERSION) {
 		GJ_DEBUG(0,
 		    "Kernel module is too old to handle metadata from %s.",
 		    cp->provider->name);
 		return (EINVAL);
 	}
 	/* Is checksum correct? */
 	if (error != 0) {
 		GJ_DEBUG(0, "MD5 metadata hash mismatch for provider %s.",
 		    cp->provider->name);
 		return (error);
 	}
 	return (0);
 }
 
 /*
  * Two functions below are responsible for updating metadata.
  * Only metadata on the data provider is updated (we need to update
  * information about active journal in there).
  */
 static void
 g_journal_metadata_done(struct bio *bp)
 {
 
 	/*
 	 * There is not much we can do on error except informing about it.
 	 */
 	if (bp->bio_error != 0) {
 		GJ_LOGREQ(0, bp, "Cannot update metadata (error=%d).",
 		    bp->bio_error);
 	} else {
 		GJ_LOGREQ(2, bp, "Metadata updated.");
 	}
 	gj_free(bp->bio_data, bp->bio_length);
 	g_destroy_bio(bp);
 }
 
 static void
 g_journal_metadata_update(struct g_journal_softc *sc)
 {
 	struct g_journal_metadata md;
 	struct g_consumer *cp;
 	struct bio *bp;
 	u_char *sector;
 
 	cp = sc->sc_dconsumer;
 	sector = gj_malloc(cp->provider->sectorsize, M_WAITOK);
 	strlcpy(md.md_magic, G_JOURNAL_MAGIC, sizeof(md.md_magic));
 	md.md_version = G_JOURNAL_VERSION;
 	md.md_id = sc->sc_id;
 	md.md_type = sc->sc_orig_type;
 	md.md_jstart = sc->sc_jstart;
 	md.md_jend = sc->sc_jend;
 	md.md_joffset = sc->sc_inactive.jj_offset;
 	md.md_jid = sc->sc_journal_previous_id;
 	md.md_flags = 0;
 	if (sc->sc_flags & GJF_DEVICE_CLEAN)
 		md.md_flags |= GJ_FLAG_CLEAN;
 
 	if (sc->sc_flags & GJF_DEVICE_HARDCODED)
 		strlcpy(md.md_provider, sc->sc_name, sizeof(md.md_provider));
 	else
 		bzero(md.md_provider, sizeof(md.md_provider));
 	md.md_provsize = cp->provider->mediasize;
 	journal_metadata_encode(&md, sector);
 
 	/*
 	 * Flush the cache, so we know all data are on disk.
 	 * We write here informations like "journal is consistent", so we need
 	 * to be sure it is. Without BIO_FLUSH here, we can end up in situation
 	 * where metadata is stored on disk, but not all data.
 	 */
 	g_journal_flush_cache(sc);
 
 	bp = g_alloc_bio();
 	bp->bio_offset = cp->provider->mediasize - cp->provider->sectorsize;
 	bp->bio_length = cp->provider->sectorsize;
 	bp->bio_data = sector;
 	bp->bio_cmd = BIO_WRITE;
 	if (!(sc->sc_flags & GJF_DEVICE_DESTROY)) {
 		bp->bio_done = g_journal_metadata_done;
 		g_io_request(bp, cp);
 	} else {
 		bp->bio_done = NULL;
 		g_io_request(bp, cp);
 		biowait(bp, "gjmdu");
 		g_journal_metadata_done(bp);
 	}
 
 	/*
 	 * Be sure metadata reached the disk.
 	 */
 	g_journal_flush_cache(sc);
 }
 
 /*
  * This is where the I/O request comes from the GEOM.
  */
 static void
 g_journal_start(struct bio *bp)
 {
 	struct g_journal_softc *sc;
 
 	sc = bp->bio_to->geom->softc;
 	GJ_LOGREQ(3, bp, "Request received.");
 
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 	case BIO_WRITE:
 		mtx_lock(&sc->sc_mtx);
 		bioq_insert_tail(&sc->sc_regular_queue, bp);
 		wakeup(sc);
 		mtx_unlock(&sc->sc_mtx);
 		return;
 	case BIO_GETATTR:
 		if (strcmp(bp->bio_attribute, "GJOURNAL::provider") == 0) {
 			strlcpy(bp->bio_data, bp->bio_to->name, bp->bio_length);
 			bp->bio_completed = strlen(bp->bio_to->name) + 1;
 			g_io_deliver(bp, 0);
 			return;
 		}
 		/* FALLTHROUGH */
 	case BIO_DELETE:
 	default:
 		g_io_deliver(bp, EOPNOTSUPP);
 		return;
 	}
 }
 
 static void
 g_journal_std_done(struct bio *bp)
 {
 	struct g_journal_softc *sc;
 
 	sc = bp->bio_from->geom->softc;
 	mtx_lock(&sc->sc_mtx);
 	bioq_insert_tail(&sc->sc_back_queue, bp);
 	wakeup(sc);
 	mtx_unlock(&sc->sc_mtx);
 }
 
 static struct bio *
 g_journal_new_bio(off_t start, off_t end, off_t joffset, u_char *data,
     int flags)
 {
 	struct bio *bp;
 
 	bp = g_alloc_bio();
 	bp->bio_offset = start;
 	bp->bio_joffset = joffset;
 	bp->bio_length = end - start;
 	bp->bio_cmd = BIO_WRITE;
 	bp->bio_done = g_journal_std_done;
 	if (data == NULL)
 		bp->bio_data = NULL;
 	else {
 		bp->bio_data = gj_malloc(bp->bio_length, flags);
 		if (bp->bio_data != NULL)
 			bcopy(data, bp->bio_data, bp->bio_length);
 	}
 	return (bp);
 }
 
 #define	g_journal_insert_bio(head, bp, flags)				\
 	g_journal_insert((head), (bp)->bio_offset,			\
 		(bp)->bio_offset + (bp)->bio_length, (bp)->bio_joffset,	\
 		(bp)->bio_data, flags)
 /*
  * The function below does a lot more than just inserting bio to the queue.
  * It keeps the queue sorted by offset and ensures that there are no doubled
  * data (it combines bios where ranges overlap).
  *
  * The function returns the number of bios inserted (as bio can be splitted).
  */
 static int
 g_journal_insert(struct bio **head, off_t nstart, off_t nend, off_t joffset,
     u_char *data, int flags)
 {
 	struct bio *nbp, *cbp, *pbp;
 	off_t cstart, cend;
 	u_char *tmpdata;
 	int n;
 
 	GJ_DEBUG(3, "INSERT(%p): (%jd, %jd, %jd)", *head, nstart, nend,
 	    joffset);
 	n = 0;
 	pbp = NULL;
 	GJQ_FOREACH(*head, cbp) {
 		cstart = cbp->bio_offset;
 		cend = cbp->bio_offset + cbp->bio_length;
 
 		if (nstart >= cend) {
 			/*
 			 *  +-------------+
 			 *  |             |
 			 *  |   current   |  +-------------+
 			 *  |     bio     |  |             |
 			 *  |             |  |     new     |
 			 *  +-------------+  |     bio     |
 			 *                   |             |
 			 *                   +-------------+
 			 */
 			GJ_DEBUG(3, "INSERT(%p): 1", *head);
 		} else if (nend <= cstart) {
 			/*
 			 *                   +-------------+
 			 *                   |             |
 			 *  +-------------+  |   current   |
 			 *  |             |  |     bio     |
 			 *  |     new     |  |             |
 			 *  |     bio     |  +-------------+
 			 *  |             |
 			 *  +-------------+
 			 */
 			nbp = g_journal_new_bio(nstart, nend, joffset, data,
 			    flags);
 			if (pbp == NULL)
 				*head = nbp;
 			else
 				pbp->bio_next = nbp;
 			nbp->bio_next = cbp;
 			n++;
 			GJ_DEBUG(3, "INSERT(%p): 2 (nbp=%p pbp=%p)", *head, nbp,
 			    pbp);
 			goto end;
 		} else if (nstart <= cstart && nend >= cend) {
 			/*
 			 *      +-------------+      +-------------+
 			 *      | current bio |      | current bio |
 			 *  +---+-------------+---+  +-------------+---+
 			 *  |   |             |   |  |             |   |
 			 *  |   |             |   |  |             |   |
 			 *  |   +-------------+   |  +-------------+   |
 			 *  |       new bio       |  |     new bio     |
 			 *  +---------------------+  +-----------------+
 			 *
 			 *      +-------------+  +-------------+
 			 *      | current bio |  | current bio |
 			 *  +---+-------------+  +-------------+
 			 *  |   |             |  |             |
 			 *  |   |             |  |             |
 			 *  |   +-------------+  +-------------+
 			 *  |     new bio     |  |   new bio   |
 			 *  +-----------------+  +-------------+
 			 */
 			g_journal_stats_bytes_skipped += cbp->bio_length;
 			cbp->bio_offset = nstart;
 			cbp->bio_joffset = joffset;
 			cbp->bio_length = cend - nstart;
 			if (cbp->bio_data != NULL) {
 				gj_free(cbp->bio_data, cend - cstart);
 				cbp->bio_data = NULL;
 			}
 			if (data != NULL) {
 				cbp->bio_data = gj_malloc(cbp->bio_length,
 				    flags);
 				if (cbp->bio_data != NULL) {
 					bcopy(data, cbp->bio_data,
 					    cbp->bio_length);
 				}
 				data += cend - nstart;
 			}
 			joffset += cend - nstart;
 			nstart = cend;
 			GJ_DEBUG(3, "INSERT(%p): 3 (cbp=%p)", *head, cbp);
 		} else if (nstart > cstart && nend >= cend) {
 			/*
 			 *  +-----------------+  +-------------+
 			 *  |   current bio   |  | current bio |
 			 *  |   +-------------+  |   +---------+---+
 			 *  |   |             |  |   |         |   |
 			 *  |   |             |  |   |         |   |
 			 *  +---+-------------+  +---+---------+   |
 			 *      |   new bio   |      |   new bio   |
 			 *      +-------------+      +-------------+
 			 */
 			g_journal_stats_bytes_skipped += cend - nstart;
 			nbp = g_journal_new_bio(nstart, cend, joffset, data,
 			    flags);
 			nbp->bio_next = cbp->bio_next;
 			cbp->bio_next = nbp;
 			cbp->bio_length = nstart - cstart;
 			if (cbp->bio_data != NULL) {
 				cbp->bio_data = gj_realloc(cbp->bio_data,
 				    cbp->bio_length, cend - cstart);
 			}
 			if (data != NULL)
 				data += cend - nstart;
 			joffset += cend - nstart;
 			nstart = cend;
 			n++;
 			GJ_DEBUG(3, "INSERT(%p): 4 (cbp=%p)", *head, cbp);
 		} else if (nstart > cstart && nend < cend) {
 			/*
 			 *  +---------------------+
 			 *  |     current bio     |
 			 *  |   +-------------+   |
 			 *  |   |             |   |
 			 *  |   |             |   |
 			 *  +---+-------------+---+
 			 *      |   new bio   |
 			 *      +-------------+
 			 */
 			g_journal_stats_bytes_skipped += nend - nstart;
 			nbp = g_journal_new_bio(nstart, nend, joffset, data,
 			    flags);
 			nbp->bio_next = cbp->bio_next;
 			cbp->bio_next = nbp;
 			if (cbp->bio_data == NULL)
 				tmpdata = NULL;
 			else
 				tmpdata = cbp->bio_data + nend - cstart;
 			nbp = g_journal_new_bio(nend, cend,
 			    cbp->bio_joffset + nend - cstart, tmpdata, flags);
 			nbp->bio_next = ((struct bio *)cbp->bio_next)->bio_next;
 			((struct bio *)cbp->bio_next)->bio_next = nbp;
 			cbp->bio_length = nstart - cstart;
 			if (cbp->bio_data != NULL) {
 				cbp->bio_data = gj_realloc(cbp->bio_data,
 				    cbp->bio_length, cend - cstart);
 			}
 			n += 2;
 			GJ_DEBUG(3, "INSERT(%p): 5 (cbp=%p)", *head, cbp);
 			goto end;
 		} else if (nstart <= cstart && nend < cend) {
 			/*
 			 *  +-----------------+      +-------------+
 			 *  |   current bio   |      | current bio |
 			 *  +-------------+   |  +---+---------+   |
 			 *  |             |   |  |   |         |   |
 			 *  |             |   |  |   |         |   |
 			 *  +-------------+---+  |   +---------+---+
 			 *  |   new bio   |      |   new bio   |
 			 *  +-------------+      +-------------+
 			 */
 			g_journal_stats_bytes_skipped += nend - nstart;
 			nbp = g_journal_new_bio(nstart, nend, joffset, data,
 			    flags);
 			if (pbp == NULL)
 				*head = nbp;
 			else
 				pbp->bio_next = nbp;
 			nbp->bio_next = cbp;
 			cbp->bio_offset = nend;
 			cbp->bio_length = cend - nend;
 			cbp->bio_joffset += nend - cstart;
 			tmpdata = cbp->bio_data;
 			if (tmpdata != NULL) {
 				cbp->bio_data = gj_malloc(cbp->bio_length,
 				    flags);
 				if (cbp->bio_data != NULL) {
 					bcopy(tmpdata + nend - cstart,
 					    cbp->bio_data, cbp->bio_length);
 				}
 				gj_free(tmpdata, cend - cstart);
 			}
 			n++;
 			GJ_DEBUG(3, "INSERT(%p): 6 (cbp=%p)", *head, cbp);
 			goto end;
 		}
 		if (nstart == nend)
 			goto end;
 		pbp = cbp;
 	}
 	nbp = g_journal_new_bio(nstart, nend, joffset, data, flags);
 	if (pbp == NULL)
 		*head = nbp;
 	else
 		pbp->bio_next = nbp;
 	nbp->bio_next = NULL;
 	n++;
 	GJ_DEBUG(3, "INSERT(%p): 8 (nbp=%p pbp=%p)", *head, nbp, pbp);
 end:
 	if (g_journal_debug >= 3) {
 		GJQ_FOREACH(*head, cbp) {
 			GJ_DEBUG(3, "ELEMENT: %p (%jd, %jd, %jd, %p)", cbp,
 			    (intmax_t)cbp->bio_offset,
 			    (intmax_t)cbp->bio_length,
 			    (intmax_t)cbp->bio_joffset, cbp->bio_data);
 		}
 		GJ_DEBUG(3, "INSERT(%p): DONE %d", *head, n);
 	}
 	return (n);
 }
 
 /*
  * The function combines neighbour bios trying to squeeze as much data as
  * possible into one bio.
  *
  * The function returns the number of bios combined (negative value).
  */
 static int
 g_journal_optimize(struct bio *head)
 {
 	struct bio *cbp, *pbp;
 	int n;
 
 	n = 0;
 	pbp = NULL;
 	GJQ_FOREACH(head, cbp) {
 		/* Skip bios which has to be read first. */
 		if (cbp->bio_data == NULL) {
 			pbp = NULL;
 			continue;
 		}
 		/* There is no previous bio yet. */
 		if (pbp == NULL) {
 			pbp = cbp;
 			continue;
 		}
 		/* Is this a neighbour bio? */
 		if (pbp->bio_offset + pbp->bio_length != cbp->bio_offset) {
 			/* Be sure that bios queue is sorted. */
 			KASSERT(pbp->bio_offset + pbp->bio_length < cbp->bio_offset,
 			    ("poffset=%jd plength=%jd coffset=%jd",
 			    (intmax_t)pbp->bio_offset,
 			    (intmax_t)pbp->bio_length,
 			    (intmax_t)cbp->bio_offset));
 			pbp = cbp;
 			continue;
 		}
 		/* Be sure we don't end up with too big bio. */
 		if (pbp->bio_length + cbp->bio_length > MAXPHYS) {
 			pbp = cbp;
 			continue;
 		}
 		/* Ok, we can join bios. */
 		GJ_LOGREQ(4, pbp, "Join: ");
 		GJ_LOGREQ(4, cbp, "and: ");
 		pbp->bio_data = gj_realloc(pbp->bio_data,
 		    pbp->bio_length + cbp->bio_length, pbp->bio_length);
 		bcopy(cbp->bio_data, pbp->bio_data + pbp->bio_length,
 		    cbp->bio_length);
 		gj_free(cbp->bio_data, cbp->bio_length);
 		pbp->bio_length += cbp->bio_length;
 		pbp->bio_next = cbp->bio_next;
 		g_destroy_bio(cbp);
 		cbp = pbp;
 		g_journal_stats_combined_ios++;
 		n--;
 		GJ_LOGREQ(4, pbp, "Got: ");
 	}
 	return (n);
 }
 
 /*
  * TODO: Update comment.
  * These are functions responsible for copying one portion of data from journal
  * to the destination provider.
  * The order goes like this:
  * 1. Read the header, which contains informations about data blocks
  *    following it.
  * 2. Read the data blocks from the journal.
  * 3. Write the data blocks on the data provider.
  *
  * g_journal_copy_start()
  * g_journal_copy_done() - got finished write request, logs potential errors.
  */
 
 /*
  * When there is no data in cache, this function is used to read it.
  */
 static void
 g_journal_read_first(struct g_journal_softc *sc, struct bio *bp)
 {
 	struct bio *cbp;
 
 	/*
 	 * We were short in memory, so data was freed.
 	 * In that case we need to read it back from journal.
 	 */
 	cbp = g_alloc_bio();
 	cbp->bio_cflags = bp->bio_cflags;
 	cbp->bio_parent = bp;
 	cbp->bio_offset = bp->bio_joffset;
 	cbp->bio_length = bp->bio_length;
 	cbp->bio_data = gj_malloc(bp->bio_length, M_WAITOK);
 	cbp->bio_cmd = BIO_READ;
 	cbp->bio_done = g_journal_std_done;
 	GJ_LOGREQ(4, cbp, "READ FIRST");
 	g_io_request(cbp, sc->sc_jconsumer);
 	g_journal_cache_misses++;
 }
 
 static void
 g_journal_copy_send(struct g_journal_softc *sc)
 {
 	struct bio *bioq, *bp, *lbp;
 
 	bioq = lbp = NULL;
 	mtx_lock(&sc->sc_mtx);
 	for (; sc->sc_copy_in_progress < g_journal_parallel_copies;) {
 		bp = GJQ_FIRST(sc->sc_inactive.jj_queue);
 		if (bp == NULL)
 			break;
 		GJQ_REMOVE(sc->sc_inactive.jj_queue, bp);
 		sc->sc_copy_in_progress++;
 		GJQ_INSERT_AFTER(bioq, bp, lbp);
 		lbp = bp;
 	}
 	mtx_unlock(&sc->sc_mtx);
 	if (g_journal_do_optimize)
 		sc->sc_copy_in_progress += g_journal_optimize(bioq);
 	while ((bp = GJQ_FIRST(bioq)) != NULL) {
 		GJQ_REMOVE(bioq, bp);
 		GJQ_INSERT_HEAD(sc->sc_copy_queue, bp);
 		bp->bio_cflags = GJ_BIO_COPY;
 		if (bp->bio_data == NULL)
 			g_journal_read_first(sc, bp);
 		else {
 			bp->bio_joffset = 0;
 			GJ_LOGREQ(4, bp, "SEND");
 			g_io_request(bp, sc->sc_dconsumer);
 		}
 	}
 }
 
 static void
 g_journal_copy_start(struct g_journal_softc *sc)
 {
 
 	/*
 	 * Remember in metadata that we're starting to copy journaled data
 	 * to the data provider.
 	 * In case of power failure, we will copy these data once again on boot.
 	 */
 	if (!sc->sc_journal_copying) {
 		sc->sc_journal_copying = 1;
 		GJ_DEBUG(1, "Starting copy of journal.");
 		g_journal_metadata_update(sc);
 	}
 	g_journal_copy_send(sc);
 }
 
 /*
  * Data block has been read from the journal provider.
  */
 static int
 g_journal_copy_read_done(struct bio *bp)
 {
 	struct g_journal_softc *sc;
 	struct g_consumer *cp;
 	struct bio *pbp;
 
 	KASSERT(bp->bio_cflags == GJ_BIO_COPY,
 	    ("Invalid bio (%d != %d).", bp->bio_cflags, GJ_BIO_COPY));
 
 	sc = bp->bio_from->geom->softc;
 	pbp = bp->bio_parent;
 
 	if (bp->bio_error != 0) {
 		GJ_DEBUG(0, "Error while reading data from %s (error=%d).",
 		    bp->bio_to->name, bp->bio_error);
 		/*
 		 * We will not be able to deliver WRITE request as well.
 		 */
 		gj_free(bp->bio_data, bp->bio_length);
 		g_destroy_bio(pbp);
 		g_destroy_bio(bp);
 		sc->sc_copy_in_progress--;
 		return (1);
 	}
 	pbp->bio_data = bp->bio_data;
 	cp = sc->sc_dconsumer;
 	g_io_request(pbp, cp);
 	GJ_LOGREQ(4, bp, "READ DONE");
 	g_destroy_bio(bp);
 	return (0);
 }
 
 /*
  * Data block has been written to the data provider.
  */
 static void
 g_journal_copy_write_done(struct bio *bp)
 {
 	struct g_journal_softc *sc;
 
 	KASSERT(bp->bio_cflags == GJ_BIO_COPY,
 	    ("Invalid bio (%d != %d).", bp->bio_cflags, GJ_BIO_COPY));
 
 	sc = bp->bio_from->geom->softc;
 	sc->sc_copy_in_progress--;
 
 	if (bp->bio_error != 0) {
 		GJ_LOGREQ(0, bp, "[copy] Error while writing data (error=%d)",
 		    bp->bio_error);
 	}
 	GJQ_REMOVE(sc->sc_copy_queue, bp);
 	gj_free(bp->bio_data, bp->bio_length);
 	GJ_LOGREQ(4, bp, "DONE");
 	g_destroy_bio(bp);
 
 	if (sc->sc_copy_in_progress == 0) {
 		/*
 		 * This was the last write request for this journal.
 		 */
 		GJ_DEBUG(1, "Data has been copied.");
 		sc->sc_journal_copying = 0;
 	}
 }
 
 static void g_journal_flush_done(struct bio *bp);
 
 /*
  * Flush one record onto active journal provider.
  */
 static void
 g_journal_flush(struct g_journal_softc *sc)
 {
 	struct g_journal_record_header hdr;
 	struct g_journal_entry *ent;
 	struct g_provider *pp;
 	struct bio **bioq;
 	struct bio *bp, *fbp, *pbp;
 	off_t joffset;
 	u_char *data, hash[16];
 	MD5_CTX ctx;
 	u_int i;
 
 	if (sc->sc_current_count == 0)
 		return;
 
 	pp = sc->sc_jprovider;
 	GJ_VALIDATE_OFFSET(sc->sc_journal_offset, sc);
 	joffset = sc->sc_journal_offset;
 
 	GJ_DEBUG(2, "Storing %d journal entries on %s at %jd.",
 	    sc->sc_current_count, pp->name, (intmax_t)joffset);
 
 	/*
 	 * Store 'journal id', so we know to which journal this record belongs.
 	 */
 	hdr.jrh_journal_id = sc->sc_journal_id;
 	/* Could be less than g_journal_record_entries if called due timeout. */
 	hdr.jrh_nentries = MIN(sc->sc_current_count, g_journal_record_entries);
 	strlcpy(hdr.jrh_magic, GJ_RECORD_HEADER_MAGIC, sizeof(hdr.jrh_magic));
 
 	bioq = &sc->sc_active.jj_queue;
 	GJQ_LAST(sc->sc_flush_queue, pbp);
 
 	fbp = g_alloc_bio();
 	fbp->bio_parent = NULL;
 	fbp->bio_cflags = GJ_BIO_JOURNAL;
 	fbp->bio_offset = -1;
 	fbp->bio_joffset = joffset;
 	fbp->bio_length = pp->sectorsize;
 	fbp->bio_cmd = BIO_WRITE;
 	fbp->bio_done = g_journal_std_done;
 	GJQ_INSERT_AFTER(sc->sc_flush_queue, fbp, pbp);
 	pbp = fbp;
 	fbp->bio_to = pp;
 	GJ_LOGREQ(4, fbp, "FLUSH_OUT");
 	joffset += pp->sectorsize;
 	sc->sc_flush_count++;
 	if (sc->sc_flags & GJF_DEVICE_CHECKSUM)
 		MD5Init(&ctx);
 
 	for (i = 0; i < hdr.jrh_nentries; i++) {
 		bp = sc->sc_current_queue;
 		KASSERT(bp != NULL, ("NULL bp"));
 		bp->bio_to = pp;
 		GJ_LOGREQ(4, bp, "FLUSHED");
 		sc->sc_current_queue = bp->bio_next;
 		bp->bio_next = NULL;
 		sc->sc_current_count--;
 
 		/* Add to the header. */
 		ent = &hdr.jrh_entries[i];
 		ent->je_offset = bp->bio_offset;
 		ent->je_joffset = joffset;
 		ent->je_length = bp->bio_length;
 
 		data = bp->bio_data;
 		if (sc->sc_flags & GJF_DEVICE_CHECKSUM)
 			MD5Update(&ctx, data, ent->je_length);
 		g_reset_bio(bp);
 		bp->bio_cflags = GJ_BIO_JOURNAL;
 		bp->bio_offset = ent->je_offset;
 		bp->bio_joffset = ent->je_joffset;
 		bp->bio_length = ent->je_length;
 		bp->bio_data = data;
 		bp->bio_cmd = BIO_WRITE;
 		bp->bio_done = g_journal_std_done;
 		GJQ_INSERT_AFTER(sc->sc_flush_queue, bp, pbp);
 		pbp = bp;
 		bp->bio_to = pp;
 		GJ_LOGREQ(4, bp, "FLUSH_OUT");
 		joffset += bp->bio_length;
 		sc->sc_flush_count++;
 
 		/*
 		 * Add request to the active sc_journal_queue queue.
 		 * This is our cache. After journal switch we don't have to
 		 * read the data from the inactive journal, because we keep
 		 * it in memory.
 		 */
 		g_journal_insert(bioq, ent->je_offset,
 		    ent->je_offset + ent->je_length, ent->je_joffset, data,
 		    M_NOWAIT);
 	}
 
 	/*
 	 * After all requests, store valid header.
 	 */
 	data = gj_malloc(pp->sectorsize, M_WAITOK);
 	if (sc->sc_flags & GJF_DEVICE_CHECKSUM) {
 		MD5Final(hash, &ctx);
 		bcopy(hash, hdr.jrh_sum, sizeof(hdr.jrh_sum));
 	}
 	g_journal_record_header_encode(&hdr, data);
 	fbp->bio_data = data;
 
 	sc->sc_journal_offset = joffset;
 
 	g_journal_check_overflow(sc);
 }
 
 /*
  * Flush request finished.
  */
 static void
 g_journal_flush_done(struct bio *bp)
 {
 	struct g_journal_softc *sc;
 	struct g_consumer *cp;
 
 	KASSERT((bp->bio_cflags & GJ_BIO_MASK) == GJ_BIO_JOURNAL,
 	    ("Invalid bio (%d != %d).", bp->bio_cflags, GJ_BIO_JOURNAL));
 
 	cp = bp->bio_from;
 	sc = cp->geom->softc;
 	sc->sc_flush_in_progress--;
 
 	if (bp->bio_error != 0) {
 		GJ_LOGREQ(0, bp, "[flush] Error while writing data (error=%d)",
 		    bp->bio_error);
 	}
 	gj_free(bp->bio_data, bp->bio_length);
 	GJ_LOGREQ(4, bp, "DONE");
 	g_destroy_bio(bp);
 }
 
 static void g_journal_release_delayed(struct g_journal_softc *sc);
 
 static void
 g_journal_flush_send(struct g_journal_softc *sc)
 {
 	struct g_consumer *cp;
 	struct bio *bioq, *bp, *lbp;
 
 	cp = sc->sc_jconsumer;
 	bioq = lbp = NULL;
 	while (sc->sc_flush_in_progress < g_journal_parallel_flushes) {
 		/* Send one flush requests to the active journal. */
 		bp = GJQ_FIRST(sc->sc_flush_queue);
 		if (bp != NULL) {
 			GJQ_REMOVE(sc->sc_flush_queue, bp);
 			sc->sc_flush_count--;
 			bp->bio_offset = bp->bio_joffset;
 			bp->bio_joffset = 0;
 			sc->sc_flush_in_progress++;
 			GJQ_INSERT_AFTER(bioq, bp, lbp);
 			lbp = bp;
 		}
 		/* Try to release delayed requests. */
 		g_journal_release_delayed(sc);
 		/* If there are no requests to flush, leave. */
 		if (GJQ_FIRST(sc->sc_flush_queue) == NULL)
 			break;
 	}
 	if (g_journal_do_optimize)
 		sc->sc_flush_in_progress += g_journal_optimize(bioq);
 	while ((bp = GJQ_FIRST(bioq)) != NULL) {
 		GJQ_REMOVE(bioq, bp);
 		GJ_LOGREQ(3, bp, "Flush request send");
 		g_io_request(bp, cp);
 	}
 }
 
 static void
 g_journal_add_current(struct g_journal_softc *sc, struct bio *bp)
 {
 	int n;
 
 	GJ_LOGREQ(4, bp, "CURRENT %d", sc->sc_current_count);
 	n = g_journal_insert_bio(&sc->sc_current_queue, bp, M_WAITOK);
 	sc->sc_current_count += n;
 	n = g_journal_optimize(sc->sc_current_queue);
 	sc->sc_current_count += n;
 	/*
 	 * For requests which are added to the current queue we deliver
 	 * response immediately.
 	 */
 	bp->bio_completed = bp->bio_length;
 	g_io_deliver(bp, 0);
 	if (sc->sc_current_count >= g_journal_record_entries) {
 		/*
 		 * Let's flush one record onto active journal provider.
 		 */
 		g_journal_flush(sc);
 	}
 }
 
 static void
 g_journal_release_delayed(struct g_journal_softc *sc)
 {
 	struct bio *bp;
 
 	for (;;) {
 		/* The flush queue is full, exit. */
 		if (sc->sc_flush_count >= g_journal_accept_immediately)
 			return;
 		bp = bioq_takefirst(&sc->sc_delayed_queue);
 		if (bp == NULL)
 			return;
 		sc->sc_delayed_count--;
 		g_journal_add_current(sc, bp);
 	}
 }
 
 /*
  * Add I/O request to the current queue. If we have enough requests for one
  * journal record we flush them onto active journal provider.
  */
 static void
 g_journal_add_request(struct g_journal_softc *sc, struct bio *bp)
 {
 
 	/*
 	 * The flush queue is full, we need to delay the request.
 	 */
 	if (sc->sc_delayed_count > 0 ||
 	    sc->sc_flush_count >= g_journal_accept_immediately) {
 		GJ_LOGREQ(4, bp, "DELAYED");
 		bioq_insert_tail(&sc->sc_delayed_queue, bp);
 		sc->sc_delayed_count++;
 		return;
 	}
 
 	KASSERT(TAILQ_EMPTY(&sc->sc_delayed_queue.queue),
 	    ("DELAYED queue not empty."));
 	g_journal_add_current(sc, bp);
 }
 
 static void g_journal_read_done(struct bio *bp);
 
 /*
  * Try to find requested data in cache.
  */
 static struct bio *
 g_journal_read_find(struct bio *head, int sorted, struct bio *pbp, off_t ostart,
     off_t oend)
 {
 	off_t cstart, cend;
 	struct bio *bp;
 
 	GJQ_FOREACH(head, bp) {
 		if (bp->bio_offset == -1)
 			continue;
 		cstart = MAX(ostart, bp->bio_offset);
 		cend = MIN(oend, bp->bio_offset + bp->bio_length);
 		if (cend <= ostart)
 			continue;
 		else if (cstart >= oend) {
 			if (!sorted)
 				continue;
 			else {
 				bp = NULL;
 				break;
 			}
 		}
 		if (bp->bio_data == NULL)
 			break;
 		GJ_DEBUG(3, "READ(%p): (%jd, %jd) (bp=%p)", head, cstart, cend,
 		    bp);
 		bcopy(bp->bio_data + cstart - bp->bio_offset,
 		    pbp->bio_data + cstart - pbp->bio_offset, cend - cstart);
 		pbp->bio_completed += cend - cstart;
 		if (pbp->bio_completed == pbp->bio_length) {
 			/*
 			 * Cool, the whole request was in cache, deliver happy
 			 * message.
 			 */
 			g_io_deliver(pbp, 0);
 			return (pbp);
 		}
 		break;
 	}
 	return (bp);
 }
 
 /*
  * This function is used for collecting data on read.
  * The complexity is because parts of the data can be stored in four different
  * places:
  * - in memory - the data not yet send to the active journal provider
  * - in the active journal
  * - in the inactive journal
  * - in the data provider
  */
 static void
 g_journal_read(struct g_journal_softc *sc, struct bio *pbp, off_t ostart,
     off_t oend)
 {
 	struct bio *bp, *nbp, *head;
 	off_t cstart, cend;
 	u_int i, sorted = 0;
 
 	GJ_DEBUG(3, "READ: (%jd, %jd)", ostart, oend);
 
 	cstart = cend = -1;
 	bp = NULL;
 	head = NULL;
 	for (i = 1; i <= 5; i++) {
 		switch (i) {
 		case 1:	/* Not-yet-send data. */
 			head = sc->sc_current_queue;
 			sorted = 1;
 			break;
 		case 2: /* Skip flush queue as they are also in active queue */
 			continue;
 		case 3:	/* Active journal. */
 			head = sc->sc_active.jj_queue;
 			sorted = 1;
 			break;
 		case 4:	/* Inactive journal. */
 			/*
 			 * XXX: Here could be a race with g_journal_lowmem().
 			 */
 			head = sc->sc_inactive.jj_queue;
 			sorted = 1;
 			break;
 		case 5:	/* In-flight to the data provider. */
 			head = sc->sc_copy_queue;
 			sorted = 0;
 			break;
 		default:
 			panic("gjournal %s: i=%d", __func__, i);
 		}
 		bp = g_journal_read_find(head, sorted, pbp, ostart, oend);
 		if (bp == pbp) { /* Got the whole request. */
 			GJ_DEBUG(2, "Got the whole request from %u.", i);
 			return;
 		} else if (bp != NULL) {
 			cstart = MAX(ostart, bp->bio_offset);
 			cend = MIN(oend, bp->bio_offset + bp->bio_length);
 			GJ_DEBUG(2, "Got part of the request from %u (%jd-%jd).",
 			    i, (intmax_t)cstart, (intmax_t)cend);
 			break;
 		}
 	}
 	if (bp != NULL) {
 		if (bp->bio_data == NULL) {
 			nbp = g_duplicate_bio(pbp);
 			nbp->bio_cflags = GJ_BIO_READ;
 			nbp->bio_data =
 			    pbp->bio_data + cstart - pbp->bio_offset;
 			nbp->bio_offset =
 			    bp->bio_joffset + cstart - bp->bio_offset;
 			nbp->bio_length = cend - cstart;
 			nbp->bio_done = g_journal_read_done;
 			g_io_request(nbp, sc->sc_jconsumer);
 		}
 		/*
 		 * If we don't have the whole request yet, call g_journal_read()
 		 * recursively.
 		 */
 		if (ostart < cstart)
 			g_journal_read(sc, pbp, ostart, cstart);
 		if (oend > cend)
 			g_journal_read(sc, pbp, cend, oend);
 	} else {
 		/*
 		 * No data in memory, no data in journal.
 		 * Its time for asking data provider.
 		 */
 		GJ_DEBUG(3, "READ(data): (%jd, %jd)", ostart, oend);
 		nbp = g_duplicate_bio(pbp);
 		nbp->bio_cflags = GJ_BIO_READ;
 		nbp->bio_data = pbp->bio_data + ostart - pbp->bio_offset;
 		nbp->bio_offset = ostart;
 		nbp->bio_length = oend - ostart;
 		nbp->bio_done = g_journal_read_done;
 		g_io_request(nbp, sc->sc_dconsumer);
 		/* We have the whole request, return here. */
 		return;
 	}
 }
 
 /*
  * Function responsible for handling finished READ requests.
  * Actually, g_std_done() could be used here, the only difference is that we
  * log error.
  */
 static void
 g_journal_read_done(struct bio *bp)
 {
 	struct bio *pbp;
 
 	KASSERT(bp->bio_cflags == GJ_BIO_READ,
 	    ("Invalid bio (%d != %d).", bp->bio_cflags, GJ_BIO_READ));
 
 	pbp = bp->bio_parent;
 	pbp->bio_inbed++;
 	pbp->bio_completed += bp->bio_length;
 
 	if (bp->bio_error != 0) {
 		if (pbp->bio_error == 0)
 			pbp->bio_error = bp->bio_error;
 		GJ_DEBUG(0, "Error while reading data from %s (error=%d).",
 		    bp->bio_to->name, bp->bio_error);
 	}
 	g_destroy_bio(bp);
 	if (pbp->bio_children == pbp->bio_inbed &&
 	    pbp->bio_completed == pbp->bio_length) {
 		/* We're done. */
 		g_io_deliver(pbp, 0);
 	}
 }
 
 /*
  * Deactive current journal and active next one.
  */
 static void
 g_journal_switch(struct g_journal_softc *sc)
 {
 	struct g_provider *pp;
 
 	if (JEMPTY(sc)) {
 		GJ_DEBUG(3, "No need for %s switch.", sc->sc_name);
 		pp = LIST_FIRST(&sc->sc_geom->provider);
 		if (!(sc->sc_flags & GJF_DEVICE_CLEAN) && pp->acw == 0) {
 			sc->sc_flags |= GJF_DEVICE_CLEAN;
 			GJ_DEBUG(1, "Marking %s as clean.", sc->sc_name);
 			g_journal_metadata_update(sc);
 		}
 	} else {
 		GJ_DEBUG(3, "Switching journal %s.", sc->sc_geom->name);
 
 		pp = sc->sc_jprovider;
 
 		sc->sc_journal_previous_id = sc->sc_journal_id;
 
 		sc->sc_journal_id = sc->sc_journal_next_id;
 		sc->sc_journal_next_id = arc4random();
 
 		GJ_VALIDATE_OFFSET(sc->sc_journal_offset, sc);
 
 		g_journal_write_header(sc);
 
 		sc->sc_inactive.jj_offset = sc->sc_active.jj_offset;
 		sc->sc_inactive.jj_queue = sc->sc_active.jj_queue;
 
 		sc->sc_active.jj_offset =
 		    sc->sc_journal_offset - pp->sectorsize;
 		sc->sc_active.jj_queue = NULL;
 
 		/*
 		 * Switch is done, start copying data from the (now) inactive
 		 * journal to the data provider.
 		 */
 		g_journal_copy_start(sc);
 	}
 	mtx_lock(&sc->sc_mtx);
 	sc->sc_flags &= ~GJF_DEVICE_SWITCH;
 	mtx_unlock(&sc->sc_mtx);
 }
 
 static void
 g_journal_initialize(struct g_journal_softc *sc)
 {
 
 	sc->sc_journal_id = arc4random();
 	sc->sc_journal_next_id = arc4random();
 	sc->sc_journal_previous_id = sc->sc_journal_id;
 	sc->sc_journal_offset = sc->sc_jstart;
 	sc->sc_inactive.jj_offset = sc->sc_jstart;
 	g_journal_write_header(sc);
 	sc->sc_active.jj_offset = sc->sc_jstart;
 }
 
 static void
 g_journal_mark_as_dirty(struct g_journal_softc *sc)
 {
 	const struct g_journal_desc *desc;
 	int i;
 
 	GJ_DEBUG(1, "Marking file system %s as dirty.", sc->sc_name);
 	for (i = 0; (desc = g_journal_filesystems[i]) != NULL; i++)
 		desc->jd_dirty(sc->sc_dconsumer);
 }
 
 /*
  * Function read record header from the given journal.
  * It is very simlar to g_read_data(9), but it doesn't allocate memory for bio
  * and data on every call.
  */
 static int
 g_journal_sync_read(struct g_consumer *cp, struct bio *bp, off_t offset,
     void *data)
 {
 	int error;
 
 	g_reset_bio(bp);
 	bp->bio_cmd = BIO_READ;
 	bp->bio_done = NULL;
 	bp->bio_offset = offset;
 	bp->bio_length = cp->provider->sectorsize;
 	bp->bio_data = data;
 	g_io_request(bp, cp);
 	error = biowait(bp, "gjs_read");
 	return (error);
 }
 
 #if 0
 /*
  * Function is called when we start the journal device and we detect that
  * one of the journals was not fully copied.
  * The purpose of this function is to read all records headers from journal
  * and placed them in the inactive queue, so we can start journal
  * synchronization process and the journal provider itself.
  * Design decision was taken to not synchronize the whole journal here as it
  * can take too much time. Reading headers only and delaying synchronization
  * process until after journal provider is started should be the best choice.
  */
 #endif
 
 static void
 g_journal_sync(struct g_journal_softc *sc)
 {
 	struct g_journal_record_header rhdr;
 	struct g_journal_entry *ent;
 	struct g_journal_header jhdr;
 	struct g_consumer *cp;
 	struct bio *bp, *fbp, *tbp;
 	off_t joffset, offset;
 	u_char *buf, sum[16];
 	uint64_t id;
 	MD5_CTX ctx;
 	int error, found, i;
 
 	found = 0;
 	fbp = NULL;
 	cp = sc->sc_jconsumer;
 	bp = g_alloc_bio();
 	buf = gj_malloc(cp->provider->sectorsize, M_WAITOK);
 	offset = joffset = sc->sc_inactive.jj_offset = sc->sc_journal_offset;
 
 	GJ_DEBUG(2, "Looking for termination at %jd.", (intmax_t)joffset);
 
 	/*
 	 * Read and decode first journal header.
 	 */
 	error = g_journal_sync_read(cp, bp, offset, buf);
 	if (error != 0) {
 		GJ_DEBUG(0, "Error while reading journal header from %s.",
 		    cp->provider->name);
 		goto end;
 	}
 	error = g_journal_header_decode(buf, &jhdr);
 	if (error != 0) {
 		GJ_DEBUG(0, "Cannot decode journal header from %s.",
 		    cp->provider->name);
 		goto end;
 	}
 	id = sc->sc_journal_id;
 	if (jhdr.jh_journal_id != sc->sc_journal_id) {
 		GJ_DEBUG(1, "Journal ID mismatch at %jd (0x%08x != 0x%08x).",
 		    (intmax_t)offset, (u_int)jhdr.jh_journal_id, (u_int)id);
 		goto end;
 	}
 	offset += cp->provider->sectorsize;
 	id = sc->sc_journal_next_id = jhdr.jh_journal_next_id;
 
 	for (;;) {
 		/*
 		 * If the biggest record won't fit, look for a record header or
 		 * journal header from the beginning.
 		 */
 		GJ_VALIDATE_OFFSET(offset, sc);
 		error = g_journal_sync_read(cp, bp, offset, buf);
 		if (error != 0) {
 			/*
 			 * Not good. Having an error while reading header
 			 * means, that we cannot read next headers and in
 			 * consequence we cannot find termination.
 			 */
 			GJ_DEBUG(0,
 			    "Error while reading record header from %s.",
 			    cp->provider->name);
 			break;
 		}
 
 		error = g_journal_record_header_decode(buf, &rhdr);
 		if (error != 0) {
 			GJ_DEBUG(2, "Not a record header at %jd (error=%d).",
 			    (intmax_t)offset, error);
 			/*
 			 * This is not a record header.
 			 * If we are lucky, this is next journal header.
 			 */
 			error = g_journal_header_decode(buf, &jhdr);
 			if (error != 0) {
 				GJ_DEBUG(1, "Not a journal header at %jd (error=%d).",
 				    (intmax_t)offset, error);
 				/*
 				 * Nope, this is not journal header, which
 				 * bascially means that journal is not
 				 * terminated properly.
 				 */
 				error = ENOENT;
 				break;
 			}
 			/*
 			 * Ok. This is header of _some_ journal. Now we need to
 			 * verify if this is header of the _next_ journal.
 			 */
 			if (jhdr.jh_journal_id != id) {
 				GJ_DEBUG(1, "Journal ID mismatch at %jd "
 				    "(0x%08x != 0x%08x).", (intmax_t)offset,
 				    (u_int)jhdr.jh_journal_id, (u_int)id);
 				error = ENOENT;
 				break;
 			}
 
 			/* Found termination. */
 			found++;
 			GJ_DEBUG(1, "Found termination at %jd (id=0x%08x).",
 			    (intmax_t)offset, (u_int)id);
 			sc->sc_active.jj_offset = offset;
 			sc->sc_journal_offset =
 			    offset + cp->provider->sectorsize;
 			sc->sc_journal_id = id;
 			id = sc->sc_journal_next_id = jhdr.jh_journal_next_id;
 
 			while ((tbp = fbp) != NULL) {
 				fbp = tbp->bio_next;
 				GJ_LOGREQ(3, tbp, "Adding request.");
 				g_journal_insert_bio(&sc->sc_inactive.jj_queue,
 				    tbp, M_WAITOK);
 			}
 
 			/* Skip journal's header. */
 			offset += cp->provider->sectorsize;
 			continue;
 		}
 
 		/* Skip record's header. */
 		offset += cp->provider->sectorsize;
 
 		/*
 		 * Add information about every record entry to the inactive
 		 * queue.
 		 */
 		if (sc->sc_flags & GJF_DEVICE_CHECKSUM)
 			MD5Init(&ctx);
 		for (i = 0; i < rhdr.jrh_nentries; i++) {
 			ent = &rhdr.jrh_entries[i];
 			GJ_DEBUG(3, "Insert entry: %jd %jd.",
 			    (intmax_t)ent->je_offset, (intmax_t)ent->je_length);
 			g_journal_insert(&fbp, ent->je_offset,
 			    ent->je_offset + ent->je_length, ent->je_joffset,
 			    NULL, M_WAITOK);
 			if (sc->sc_flags & GJF_DEVICE_CHECKSUM) {
 				u_char *buf2;
 
 				/*
 				 * TODO: Should use faster function (like
 				 *       g_journal_sync_read()).
 				 */
 				buf2 = g_read_data(cp, offset, ent->je_length,
 				    NULL);
 				if (buf2 == NULL)
 					GJ_DEBUG(0, "Cannot read data at %jd.",
 					    (intmax_t)offset);
 				else {
 					MD5Update(&ctx, buf2, ent->je_length);
 					g_free(buf2);
 				}
 			}
 			/* Skip entry's data. */
 			offset += ent->je_length;
 		}
 		if (sc->sc_flags & GJF_DEVICE_CHECKSUM) {
 			MD5Final(sum, &ctx);
 			if (bcmp(sum, rhdr.jrh_sum, sizeof(rhdr.jrh_sum)) != 0) {
 				GJ_DEBUG(0, "MD5 hash mismatch at %jd!",
 				    (intmax_t)offset);
 			}
 		}
 	}
 end:
 	gj_free(bp->bio_data, cp->provider->sectorsize);
 	g_destroy_bio(bp);
 
 	/* Remove bios from unterminated journal. */
 	while ((tbp = fbp) != NULL) {
 		fbp = tbp->bio_next;
 		g_destroy_bio(tbp);
 	}
 
 	if (found < 1 && joffset > 0) {
 		GJ_DEBUG(0, "Journal on %s is broken/corrupted. Initializing.",
 		    sc->sc_name);
 		while ((tbp = sc->sc_inactive.jj_queue) != NULL) {
 			sc->sc_inactive.jj_queue = tbp->bio_next;
 			g_destroy_bio(tbp);
 		}
 		g_journal_initialize(sc);
 		g_journal_mark_as_dirty(sc);
 	} else {
 		GJ_DEBUG(0, "Journal %s consistent.", sc->sc_name);
 		g_journal_copy_start(sc);
 	}
 }
 
 /*
  * Wait for requests.
  * If we have requests in the current queue, flush them after 3 seconds from the
  * last flush. In this way we don't wait forever (or for journal switch) with
  * storing not full records on journal.
  */
 static void
 g_journal_wait(struct g_journal_softc *sc, time_t last_write)
 {
 	int error, timeout;
 
 	GJ_DEBUG(3, "%s: enter", __func__);
 	if (sc->sc_current_count == 0) {
 		if (g_journal_debug < 2)
 			msleep(sc, &sc->sc_mtx, PRIBIO | PDROP, "gj:work", 0);
 		else {
 			/*
 			 * If we have debug turned on, show number of elements
 			 * in various queues.
 			 */
 			for (;;) {
 				error = msleep(sc, &sc->sc_mtx, PRIBIO,
 				    "gj:work", hz * 3);
 				if (error == 0) {
 					mtx_unlock(&sc->sc_mtx);
 					break;
 				}
 				GJ_DEBUG(3, "Report: current count=%d",
 				    sc->sc_current_count);
 				GJ_DEBUG(3, "Report: flush count=%d",
 				    sc->sc_flush_count);
 				GJ_DEBUG(3, "Report: flush in progress=%d",
 				    sc->sc_flush_in_progress);
 				GJ_DEBUG(3, "Report: copy in progress=%d",
 				    sc->sc_copy_in_progress);
 				GJ_DEBUG(3, "Report: delayed=%d",
 				    sc->sc_delayed_count);
 			}
 		}
 		GJ_DEBUG(3, "%s: exit 1", __func__);
 		return;
 	}
 
 	/*
 	 * Flush even not full records every 3 seconds.
 	 */
 	timeout = (last_write + 3 - time_second) * hz;
 	if (timeout <= 0) {
 		mtx_unlock(&sc->sc_mtx);
 		g_journal_flush(sc);
 		g_journal_flush_send(sc);
 		GJ_DEBUG(3, "%s: exit 2", __func__);
 		return;
 	}
 	error = msleep(sc, &sc->sc_mtx, PRIBIO | PDROP, "gj:work", timeout);
 	if (error == EWOULDBLOCK)
 		g_journal_flush_send(sc);
 	GJ_DEBUG(3, "%s: exit 3", __func__);
 }
 
 /*
  * Worker thread.
  */
 static void
 g_journal_worker(void *arg)
 {
 	struct g_journal_softc *sc;
 	struct g_geom *gp;
 	struct g_provider *pp;
 	struct bio *bp;
 	time_t last_write;
 	int type;
 
 	thread_lock(curthread);
 	sched_prio(curthread, PRIBIO);
 	thread_unlock(curthread);
 
 	sc = arg;
 	type = 0;	/* gcc */
 
 	if (sc->sc_flags & GJF_DEVICE_CLEAN) {
 		GJ_DEBUG(0, "Journal %s clean.", sc->sc_name);
 		g_journal_initialize(sc);
 	} else {
 		g_journal_sync(sc);
 	}
 	/*
 	 * Check if we can use BIO_FLUSH.
 	 */
 	sc->sc_bio_flush = 0;
 	if (g_io_flush(sc->sc_jconsumer) == 0) {
 		sc->sc_bio_flush |= GJ_FLUSH_JOURNAL;
 		GJ_DEBUG(1, "BIO_FLUSH supported by %s.",
 		    sc->sc_jconsumer->provider->name);
 	} else {
 		GJ_DEBUG(0, "BIO_FLUSH not supported by %s.",
 		    sc->sc_jconsumer->provider->name);
 	}
 	if (sc->sc_jconsumer != sc->sc_dconsumer) {
 		if (g_io_flush(sc->sc_dconsumer) == 0) {
 			sc->sc_bio_flush |= GJ_FLUSH_DATA;
 			GJ_DEBUG(1, "BIO_FLUSH supported by %s.",
 			    sc->sc_dconsumer->provider->name);
 		} else {
 			GJ_DEBUG(0, "BIO_FLUSH not supported by %s.",
 			    sc->sc_dconsumer->provider->name);
 		}
 	}
 
 	gp = sc->sc_geom;
 	g_topology_lock();
 	pp = g_new_providerf(gp, "%s.journal", sc->sc_name);
 	pp->mediasize = sc->sc_mediasize;
 	/*
 	 * There could be a problem when data provider and journal providers
 	 * have different sectorsize, but such scenario is prevented on journal
 	 * creation.
 	 */
 	pp->sectorsize = sc->sc_sectorsize;
 	g_error_provider(pp, 0);
 	g_topology_unlock();
 	last_write = time_second;
 
 	if (sc->sc_rootmount != NULL) {
 		GJ_DEBUG(1, "root_mount_rel %p", sc->sc_rootmount);
 		root_mount_rel(sc->sc_rootmount);
 		sc->sc_rootmount = NULL;
 	}
 
 	for (;;) {
 		/* Get first request from the queue. */
 		mtx_lock(&sc->sc_mtx);
 		bp = bioq_first(&sc->sc_back_queue);
 		if (bp != NULL)
 			type = (bp->bio_cflags & GJ_BIO_MASK);
 		if (bp == NULL) {
 			bp = bioq_first(&sc->sc_regular_queue);
 			if (bp != NULL)
 				type = GJ_BIO_REGULAR;
 		}
 		if (bp == NULL) {
 try_switch:
 			if ((sc->sc_flags & GJF_DEVICE_SWITCH) ||
 			    (sc->sc_flags & GJF_DEVICE_DESTROY)) {
 				if (sc->sc_current_count > 0) {
 					mtx_unlock(&sc->sc_mtx);
 					g_journal_flush(sc);
 					g_journal_flush_send(sc);
 					continue;
 				}
 				if (sc->sc_flush_in_progress > 0)
 					goto sleep;
 				if (sc->sc_copy_in_progress > 0)
 					goto sleep;
 			}
 			if (sc->sc_flags & GJF_DEVICE_SWITCH) {
 				mtx_unlock(&sc->sc_mtx);
 				g_journal_switch(sc);
 				wakeup(&sc->sc_journal_copying);
 				continue;
 			}
 			if (sc->sc_flags & GJF_DEVICE_DESTROY) {
 				GJ_DEBUG(1, "Shutting down worker "
 				    "thread for %s.", gp->name);
 				sc->sc_worker = NULL;
 				wakeup(&sc->sc_worker);
 				mtx_unlock(&sc->sc_mtx);
 				kproc_exit(0);
 			}
 sleep:
 			g_journal_wait(sc, last_write);
 			continue;
 		}
 		/*
 		 * If we're in switch process, we need to delay all new
 		 * write requests until its done.
 		 */
 		if ((sc->sc_flags & GJF_DEVICE_SWITCH) &&
 		    type == GJ_BIO_REGULAR && bp->bio_cmd == BIO_WRITE) {
 			GJ_LOGREQ(2, bp, "WRITE on SWITCH");
 			goto try_switch;
 		}
 		if (type == GJ_BIO_REGULAR)
 			bioq_remove(&sc->sc_regular_queue, bp);
 		else
 			bioq_remove(&sc->sc_back_queue, bp);
 		mtx_unlock(&sc->sc_mtx);
 		switch (type) {
 		case GJ_BIO_REGULAR:
 			/* Regular request. */
 			switch (bp->bio_cmd) {
 			case BIO_READ:
 				g_journal_read(sc, bp, bp->bio_offset,
 				    bp->bio_offset + bp->bio_length);
 				break;
 			case BIO_WRITE:
 				last_write = time_second;
 				g_journal_add_request(sc, bp);
 				g_journal_flush_send(sc);
 				break;
 			default:
 				panic("Invalid bio_cmd (%d).", bp->bio_cmd);
 			}
 			break;
 		case GJ_BIO_COPY:
 			switch (bp->bio_cmd) {
 			case BIO_READ:
 				if (g_journal_copy_read_done(bp))
 					g_journal_copy_send(sc);
 				break;
 			case BIO_WRITE:
 				g_journal_copy_write_done(bp);
 				g_journal_copy_send(sc);
 				break;
 			default:
 				panic("Invalid bio_cmd (%d).", bp->bio_cmd);
 			}
 			break;
 		case GJ_BIO_JOURNAL:
 			g_journal_flush_done(bp);
 			g_journal_flush_send(sc);
 			break;
 		case GJ_BIO_READ:
 		default:
 			panic("Invalid bio (%d).", type);
 		}
 	}
 }
 
 static void
 g_journal_destroy_event(void *arg, int flags __unused)
 {
 	struct g_journal_softc *sc;
 
 	g_topology_assert();
 	sc = arg;
 	g_journal_destroy(sc);
 }
 
 static void
 g_journal_timeout(void *arg)
 {
 	struct g_journal_softc *sc;
 
 	sc = arg;
 	GJ_DEBUG(0, "Timeout. Journal %s cannot be completed.",
 	    sc->sc_geom->name);
 	g_post_event(g_journal_destroy_event, sc, M_NOWAIT, NULL);
 }
 
 static struct g_geom *
 g_journal_create(struct g_class *mp, struct g_provider *pp,
     const struct g_journal_metadata *md)
 {
 	struct g_journal_softc *sc;
 	struct g_geom *gp;
 	struct g_consumer *cp;
 	int error;
 
 	sc = NULL;	/* gcc */
 
 	g_topology_assert();
 	/*
 	 * There are two possibilities:
 	 * 1. Data and both journals are on the same provider.
 	 * 2. Data and journals are all on separated providers.
 	 */
 	/* Look for journal device with the same ID. */
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		sc = gp->softc;
 		if (sc == NULL)
 			continue;
 		if (sc->sc_id == md->md_id)
 			break;
 	}
 	if (gp == NULL)
 		sc = NULL;
 	else if (sc != NULL && (sc->sc_type & md->md_type) != 0) {
 		GJ_DEBUG(1, "Journal device %u already configured.", sc->sc_id);
 		return (NULL);
 	}
 	if (md->md_type == 0 || (md->md_type & ~GJ_TYPE_COMPLETE) != 0) {
 		GJ_DEBUG(0, "Invalid type on %s.", pp->name);
 		return (NULL);
 	}
 	if (md->md_type & GJ_TYPE_DATA) {
 		GJ_DEBUG(0, "Journal %u: %s contains data.", md->md_id,
 		    pp->name);
 	}
 	if (md->md_type & GJ_TYPE_JOURNAL) {
 		GJ_DEBUG(0, "Journal %u: %s contains journal.", md->md_id,
 		    pp->name);
 	}
 
 	if (sc == NULL) {
 		/* Action geom. */
 		sc = malloc(sizeof(*sc), M_JOURNAL, M_WAITOK | M_ZERO);
 		sc->sc_id = md->md_id;
 		sc->sc_type = 0;
 		sc->sc_flags = 0;
 		sc->sc_worker = NULL;
 
 		gp = g_new_geomf(mp, "gjournal %u", sc->sc_id);
 		gp->start = g_journal_start;
 		gp->orphan = g_journal_orphan;
 		gp->access = g_journal_access;
 		gp->softc = sc;
 		gp->flags |= G_GEOM_VOLATILE_BIO;
 		sc->sc_geom = gp;
 
 		mtx_init(&sc->sc_mtx, "gjournal", NULL, MTX_DEF);
 
 		bioq_init(&sc->sc_back_queue);
 		bioq_init(&sc->sc_regular_queue);
 		bioq_init(&sc->sc_delayed_queue);
 		sc->sc_delayed_count = 0;
 		sc->sc_current_queue = NULL;
 		sc->sc_current_count = 0;
 		sc->sc_flush_queue = NULL;
 		sc->sc_flush_count = 0;
 		sc->sc_flush_in_progress = 0;
 		sc->sc_copy_queue = NULL;
 		sc->sc_copy_in_progress = 0;
 		sc->sc_inactive.jj_queue = NULL;
 		sc->sc_active.jj_queue = NULL;
 
 		sc->sc_rootmount = root_mount_hold("GJOURNAL");
 		GJ_DEBUG(1, "root_mount_hold %p", sc->sc_rootmount);
 
 		callout_init(&sc->sc_callout, 1);
 		if (md->md_type != GJ_TYPE_COMPLETE) {
 			/*
 			 * Journal and data are on separate providers.
 			 * At this point we have only one of them.
 			 * We setup a timeout in case the other part will not
 			 * appear, so we won't wait forever.
 			 */
 			callout_reset(&sc->sc_callout, 5 * hz,
 			    g_journal_timeout, sc);
 		}
 	}
 
 	/* Remember type of the data provider. */
 	if (md->md_type & GJ_TYPE_DATA)
 		sc->sc_orig_type = md->md_type;
 	sc->sc_type |= md->md_type;
 	cp = NULL;
 
 	if (md->md_type & GJ_TYPE_DATA) {
 		if (md->md_flags & GJ_FLAG_CLEAN)
 			sc->sc_flags |= GJF_DEVICE_CLEAN;
 		if (md->md_flags & GJ_FLAG_CHECKSUM)
 			sc->sc_flags |= GJF_DEVICE_CHECKSUM;
 		cp = g_new_consumer(gp);
 		error = g_attach(cp, pp);
 		KASSERT(error == 0, ("Cannot attach to %s (error=%d).",
 		    pp->name, error));
 		error = g_access(cp, 1, 1, 1);
 		if (error != 0) {
 			GJ_DEBUG(0, "Cannot access %s (error=%d).", pp->name,
 			    error);
 			g_journal_destroy(sc);
 			return (NULL);
 		}
 		sc->sc_dconsumer = cp;
 		sc->sc_mediasize = pp->mediasize - pp->sectorsize;
 		sc->sc_sectorsize = pp->sectorsize;
 		sc->sc_jstart = md->md_jstart;
 		sc->sc_jend = md->md_jend;
 		if (md->md_provider[0] != '\0')
 			sc->sc_flags |= GJF_DEVICE_HARDCODED;
 		sc->sc_journal_offset = md->md_joffset;
 		sc->sc_journal_id = md->md_jid;
 		sc->sc_journal_previous_id = md->md_jid;
 	}
 	if (md->md_type & GJ_TYPE_JOURNAL) {
 		if (cp == NULL) {
 			cp = g_new_consumer(gp);
 			error = g_attach(cp, pp);
 			KASSERT(error == 0, ("Cannot attach to %s (error=%d).",
 			    pp->name, error));
 			error = g_access(cp, 1, 1, 1);
 			if (error != 0) {
 				GJ_DEBUG(0, "Cannot access %s (error=%d).",
 				    pp->name, error);
 				g_journal_destroy(sc);
 				return (NULL);
 			}
 		} else {
 			/*
 			 * Journal is on the same provider as data, which means
 			 * that data provider ends where journal starts.
 			 */
 			sc->sc_mediasize = md->md_jstart;
 		}
 		sc->sc_jconsumer = cp;
 	}
 
 	/* Start switcher kproc if needed. */
 	if (g_journal_switcher_proc == NULL)
 		g_journal_start_switcher(mp);
 
 	if ((sc->sc_type & GJ_TYPE_COMPLETE) != GJ_TYPE_COMPLETE) {
 		/* Journal is not complete yet. */
 		return (gp);
 	} else {
 		/* Journal complete, cancel timeout. */
 		callout_drain(&sc->sc_callout);
 	}
 
 	error = kproc_create(g_journal_worker, sc, &sc->sc_worker, 0, 0,
 	    "g_journal %s", sc->sc_name);
 	if (error != 0) {
 		GJ_DEBUG(0, "Cannot create worker thread for %s.journal.",
 		    sc->sc_name);
 		g_journal_destroy(sc);
 		return (NULL);
 	}
 
 	return (gp);
 }
 
 static void
 g_journal_destroy_consumer(void *arg, int flags __unused)
 {
 	struct g_consumer *cp;
 
 	g_topology_assert();
 	cp = arg;
 	g_detach(cp);
 	g_destroy_consumer(cp);
 }
 
 static int
 g_journal_destroy(struct g_journal_softc *sc)
 {
 	struct g_geom *gp;
 	struct g_provider *pp;
 	struct g_consumer *cp;
 
 	g_topology_assert();
 
 	if (sc == NULL)
 		return (ENXIO);
 
 	gp = sc->sc_geom;
 	pp = LIST_FIRST(&gp->provider);
 	if (pp != NULL) {
 		if (pp->acr != 0 || pp->acw != 0 || pp->ace != 0) {
 			GJ_DEBUG(1, "Device %s is still open (r%dw%de%d).",
 			    pp->name, pp->acr, pp->acw, pp->ace);
 			return (EBUSY);
 		}
 		g_error_provider(pp, ENXIO);
 
 		g_journal_flush(sc);
 		g_journal_flush_send(sc);
 		g_journal_switch(sc);
 	}
 
 	sc->sc_flags |= (GJF_DEVICE_DESTROY | GJF_DEVICE_CLEAN);
 
 	g_topology_unlock();
 
 	if (sc->sc_rootmount != NULL) {
 		GJ_DEBUG(1, "root_mount_rel %p", sc->sc_rootmount);
 		root_mount_rel(sc->sc_rootmount);
 		sc->sc_rootmount = NULL;
 	}
 
 	callout_drain(&sc->sc_callout);
 	mtx_lock(&sc->sc_mtx);
 	wakeup(sc);
 	while (sc->sc_worker != NULL)
 		msleep(&sc->sc_worker, &sc->sc_mtx, PRIBIO, "gj:destroy", 0);
 	mtx_unlock(&sc->sc_mtx);
 
 	if (pp != NULL) {
 		GJ_DEBUG(1, "Marking %s as clean.", sc->sc_name);
 		g_journal_metadata_update(sc);
 		g_topology_lock();
 		g_wither_provider(pp, ENXIO);
 	} else {
 		g_topology_lock();
 	}
 	mtx_destroy(&sc->sc_mtx);
 
 	if (sc->sc_current_count != 0) {
 		GJ_DEBUG(0, "Warning! Number of current requests %d.",
 		    sc->sc_current_count);
 	}
 
 	gp->softc = NULL;
 	LIST_FOREACH(cp, &gp->consumer, consumer) {
 		if (cp->acr + cp->acw + cp->ace > 0)
 			g_access(cp, -1, -1, -1);
 		/*
 		 * We keep all consumers open for writting, so if I'll detach
 		 * and destroy consumer here, I'll get providers for taste, so
 		 * journal will be started again.
 		 * Sending an event here, prevents this from happening.
 		 */
 		g_post_event(g_journal_destroy_consumer, cp, M_WAITOK, NULL);
 	}
 	g_wither_geom(gp, ENXIO);
 	free(sc, M_JOURNAL);
 	return (0);
 }
 
 static void
 g_journal_taste_orphan(struct g_consumer *cp)
 {
 
 	KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
 	    cp->provider->name));
 }
 
 static struct g_geom *
 g_journal_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
 {
 	struct g_journal_metadata md;
 	struct g_consumer *cp;
 	struct g_geom *gp;
 	int error;
 
 	g_topology_assert();
 	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
 	GJ_DEBUG(2, "Tasting %s.", pp->name);
 	if (pp->geom->class == mp)
 		return (NULL);
 
 	gp = g_new_geomf(mp, "journal:taste");
 	/* This orphan function should be never called. */
 	gp->orphan = g_journal_taste_orphan;
 	cp = g_new_consumer(gp);
 	g_attach(cp, pp);
 	error = g_journal_metadata_read(cp, &md);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 	g_destroy_geom(gp);
 	if (error != 0)
 		return (NULL);
 	gp = NULL;
 
 	if (md.md_provider[0] != '\0' &&
 	    !g_compare_names(md.md_provider, pp->name))
 		return (NULL);
 	if (md.md_provsize != 0 && md.md_provsize != pp->mediasize)
 		return (NULL);
 	if (g_journal_debug >= 2)
 		journal_metadata_dump(&md);
 
 	gp = g_journal_create(mp, pp, &md);
 	return (gp);
 }
 
 static struct g_journal_softc *
 g_journal_find_device(struct g_class *mp, const char *name)
 {
 	struct g_journal_softc *sc;
 	struct g_geom *gp;
 	struct g_provider *pp;
 
 	if (strncmp(name, "/dev/", 5) == 0)
 		name += 5;
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		sc = gp->softc;
 		if (sc == NULL)
 			continue;
 		if (sc->sc_flags & GJF_DEVICE_DESTROY)
 			continue;
 		if ((sc->sc_type & GJ_TYPE_COMPLETE) != GJ_TYPE_COMPLETE)
 			continue;
 		pp = LIST_FIRST(&gp->provider);
 		if (strcmp(sc->sc_name, name) == 0)
 			return (sc);
 		if (pp != NULL && strcmp(pp->name, name) == 0)
 			return (sc);
 	}
 	return (NULL);
 }
 
 static void
 g_journal_ctl_destroy(struct gctl_req *req, struct g_class *mp)
 {
 	struct g_journal_softc *sc;
 	const char *name;
 	char param[16];
 	int *nargs;
 	int error, i;
 
 	g_topology_assert();
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "No '%s' argument.", "nargs");
 		return;
 	}
 	if (*nargs <= 0) {
 		gctl_error(req, "Missing device(s).");
 		return;
 	}
 
 	for (i = 0; i < *nargs; i++) {
 		snprintf(param, sizeof(param), "arg%d", i);
 		name = gctl_get_asciiparam(req, param);
 		if (name == NULL) {
 			gctl_error(req, "No 'arg%d' argument.", i);
 			return;
 		}
 		sc = g_journal_find_device(mp, name);
 		if (sc == NULL) {
 			gctl_error(req, "No such device: %s.", name);
 			return;
 		}
 		error = g_journal_destroy(sc);
 		if (error != 0) {
 			gctl_error(req, "Cannot destroy device %s (error=%d).",
 			    LIST_FIRST(&sc->sc_geom->provider)->name, error);
 			return;
 		}
 	}
 }
 
 static void
 g_journal_ctl_sync(struct gctl_req *req __unused, struct g_class *mp __unused)
 {
 
 	g_topology_assert();
 	g_topology_unlock();
 	g_journal_sync_requested++;
 	wakeup(&g_journal_switcher_state);
 	while (g_journal_sync_requested > 0)
 		tsleep(&g_journal_sync_requested, PRIBIO, "j:sreq", hz / 2);
 	g_topology_lock();
 }
 
 static void
 g_journal_config(struct gctl_req *req, struct g_class *mp, const char *verb)
 {
 	uint32_t *version;
 
 	g_topology_assert();
 
 	version = gctl_get_paraml(req, "version", sizeof(*version));
 	if (version == NULL) {
 		gctl_error(req, "No '%s' argument.", "version");
 		return;
 	}
 	if (*version != G_JOURNAL_VERSION) {
 		gctl_error(req, "Userland and kernel parts are out of sync.");
 		return;
 	}
 
 	if (strcmp(verb, "destroy") == 0 || strcmp(verb, "stop") == 0) {
 		g_journal_ctl_destroy(req, mp);
 		return;
 	} else if (strcmp(verb, "sync") == 0) {
 		g_journal_ctl_sync(req, mp);
 		return;
 	}
 
 	gctl_error(req, "Unknown verb.");
 }
 
 static void
 g_journal_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
     struct g_consumer *cp, struct g_provider *pp)
 {
 	struct g_journal_softc *sc;
 
 	g_topology_assert();
 
 	sc = gp->softc;
 	if (sc == NULL)
 		return;
 	if (pp != NULL) {
 		/* Nothing here. */
 	} else if (cp != NULL) {
 		int first = 1;
 
 		sbuf_printf(sb, "%s<Role>", indent);
 		if (cp == sc->sc_dconsumer) {
 			sbuf_cat(sb, "Data");
 			first = 0;
 		}
 		if (cp == sc->sc_jconsumer) {
 			if (!first)
 				sbuf_cat(sb, ",");
 			sbuf_cat(sb, "Journal");
 		}
 		sbuf_cat(sb, "</Role>\n");
 		if (cp == sc->sc_jconsumer) {
 			sbuf_printf(sb, "<Jstart>%jd</Jstart>\n",
 			    (intmax_t)sc->sc_jstart);
 			sbuf_printf(sb, "<Jend>%jd</Jend>\n",
 			    (intmax_t)sc->sc_jend);
 		}
 	} else {
 		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id);
 	}
 }
 
 static eventhandler_tag g_journal_event_shutdown = NULL;
 static eventhandler_tag g_journal_event_lowmem = NULL;
 
 static void
 g_journal_shutdown(void *arg, int howto __unused)
 {
 	struct g_class *mp;
 	struct g_geom *gp, *gp2;
 
 	if (panicstr != NULL)
 		return;
 	mp = arg;
 	g_topology_lock();
 	LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) {
 		if (gp->softc == NULL)
 			continue;
 		GJ_DEBUG(0, "Shutting down geom %s.", gp->name);
 		g_journal_destroy(gp->softc);
 	}
 	g_topology_unlock();
 }
 
 /*
  * Free cached requests from inactive queue in case of low memory.
  * We free GJ_FREE_AT_ONCE elements at once.
  */
 #define	GJ_FREE_AT_ONCE	4
 static void
 g_journal_lowmem(void *arg, int howto __unused)
 {
 	struct g_journal_softc *sc;
 	struct g_class *mp;
 	struct g_geom *gp;
 	struct bio *bp;
 	u_int nfree = GJ_FREE_AT_ONCE;
 
 	g_journal_stats_low_mem++;
 	mp = arg;
 	g_topology_lock();
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		sc = gp->softc;
 		if (sc == NULL || (sc->sc_flags & GJF_DEVICE_DESTROY))
 			continue;
 		mtx_lock(&sc->sc_mtx);
 		for (bp = sc->sc_inactive.jj_queue; nfree > 0 && bp != NULL;
 		    nfree--, bp = bp->bio_next) {
 			/*
 			 * This is safe to free the bio_data, because:
 			 * 1. If bio_data is NULL it will be read from the
 			 *    inactive journal.
 			 * 2. If bp is sent down, it is first removed from the
 			 *    inactive queue, so it's impossible to free the
 			 *    data from under in-flight bio.
 			 * On the other hand, freeing elements from the active
 			 * queue, is not safe.
 			 */
 			if (bp->bio_data != NULL) {
 				GJ_DEBUG(2, "Freeing data from %s.",
 				    sc->sc_name);
 				gj_free(bp->bio_data, bp->bio_length);
 				bp->bio_data = NULL;
 			}
 		}
 		mtx_unlock(&sc->sc_mtx);
 		if (nfree == 0)
 			break;
 	}
 	g_topology_unlock();
 }
 
 static void g_journal_switcher(void *arg);
 
 static void
 g_journal_init(struct g_class *mp)
 {
 
 	/* Pick a conservative value if provided value sucks. */
 	if (g_journal_cache_divisor <= 0 ||
 	    (vm_kmem_size / g_journal_cache_divisor == 0)) {
 		g_journal_cache_divisor = 5;
 	}
 	if (g_journal_cache_limit > 0) {
 		g_journal_cache_limit = vm_kmem_size / g_journal_cache_divisor;
 		g_journal_cache_low =
 		    (g_journal_cache_limit / 100) * g_journal_cache_switch;
 	}
 	g_journal_event_shutdown = EVENTHANDLER_REGISTER(shutdown_post_sync,
 	    g_journal_shutdown, mp, EVENTHANDLER_PRI_FIRST);
 	if (g_journal_event_shutdown == NULL)
 		GJ_DEBUG(0, "Warning! Cannot register shutdown event.");
 	g_journal_event_lowmem = EVENTHANDLER_REGISTER(vm_lowmem,
 	    g_journal_lowmem, mp, EVENTHANDLER_PRI_FIRST);
 	if (g_journal_event_lowmem == NULL)
 		GJ_DEBUG(0, "Warning! Cannot register lowmem event.");
 }
 
 static void
 g_journal_fini(struct g_class *mp)
 {
 
 	if (g_journal_event_shutdown != NULL) {
 		EVENTHANDLER_DEREGISTER(shutdown_post_sync,
 		    g_journal_event_shutdown);
 	}
 	if (g_journal_event_lowmem != NULL)
 		EVENTHANDLER_DEREGISTER(vm_lowmem, g_journal_event_lowmem);
 	g_journal_stop_switcher();
 }
 
 DECLARE_GEOM_CLASS(g_journal_class, g_journal);
 
 static const struct g_journal_desc *
 g_journal_find_desc(const char *fstype)
 {
 	const struct g_journal_desc *desc;
 	int i;
 
 	for (desc = g_journal_filesystems[i = 0]; desc != NULL;
 	     desc = g_journal_filesystems[++i]) {
 		if (strcmp(desc->jd_fstype, fstype) == 0)
 			break;
 	}
 	return (desc);
 }
 
 static void
 g_journal_switch_wait(struct g_journal_softc *sc)
 {
 	struct bintime bt;
 
 	mtx_assert(&sc->sc_mtx, MA_OWNED);
 	if (g_journal_debug >= 2) {
 		if (sc->sc_flush_in_progress > 0) {
 			GJ_DEBUG(2, "%d requests flushing.",
 			    sc->sc_flush_in_progress);
 		}
 		if (sc->sc_copy_in_progress > 0) {
 			GJ_DEBUG(2, "%d requests copying.",
 			    sc->sc_copy_in_progress);
 		}
 		if (sc->sc_flush_count > 0) {
 			GJ_DEBUG(2, "%d requests to flush.",
 			    sc->sc_flush_count);
 		}
 		if (sc->sc_delayed_count > 0) {
 			GJ_DEBUG(2, "%d requests delayed.",
 			    sc->sc_delayed_count);
 		}
 	}
 	g_journal_stats_switches++;
 	if (sc->sc_copy_in_progress > 0)
 		g_journal_stats_wait_for_copy++;
 	GJ_TIMER_START(1, &bt);
 	sc->sc_flags &= ~GJF_DEVICE_BEFORE_SWITCH;
 	sc->sc_flags |= GJF_DEVICE_SWITCH;
 	wakeup(sc);
 	while (sc->sc_flags & GJF_DEVICE_SWITCH) {
 		msleep(&sc->sc_journal_copying, &sc->sc_mtx, PRIBIO,
 		    "gj:switch", 0);
 	}
 	GJ_TIMER_STOP(1, &bt, "Switch time of %s", sc->sc_name);
 }
 
 static void
 g_journal_do_switch(struct g_class *classp)
 {
 	struct g_journal_softc *sc;
 	const struct g_journal_desc *desc;
 	struct g_geom *gp;
 	struct mount *mp;
 	struct bintime bt;
 	char *mountpoint;
 	int error, save;
 
 	g_topology_lock();
 	LIST_FOREACH(gp, &classp->geom, geom) {
 		sc = gp->softc;
 		if (sc == NULL)
 			continue;
 		if (sc->sc_flags & GJF_DEVICE_DESTROY)
 			continue;
 		if ((sc->sc_type & GJ_TYPE_COMPLETE) != GJ_TYPE_COMPLETE)
 			continue;
 		mtx_lock(&sc->sc_mtx);
 		sc->sc_flags |= GJF_DEVICE_BEFORE_SWITCH;
 		mtx_unlock(&sc->sc_mtx);
 	}
 	g_topology_unlock();
 
 	mtx_lock(&mountlist_mtx);
 	TAILQ_FOREACH(mp, &mountlist, mnt_list) {
 		if (mp->mnt_gjprovider == NULL)
 			continue;
 		if (mp->mnt_flag & MNT_RDONLY)
 			continue;
 		desc = g_journal_find_desc(mp->mnt_stat.f_fstypename);
 		if (desc == NULL)
 			continue;
 		if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK))
 			continue;
 		/* mtx_unlock(&mountlist_mtx) was done inside vfs_busy() */
 
 		g_topology_lock();
 		sc = g_journal_find_device(classp, mp->mnt_gjprovider);
 		g_topology_unlock();
 
 		if (sc == NULL) {
 			GJ_DEBUG(0, "Cannot find journal geom for %s.",
 			    mp->mnt_gjprovider);
 			goto next;
 		} else if (JEMPTY(sc)) {
 			mtx_lock(&sc->sc_mtx);
 			sc->sc_flags &= ~GJF_DEVICE_BEFORE_SWITCH;
 			mtx_unlock(&sc->sc_mtx);
 			GJ_DEBUG(3, "No need for %s switch.", sc->sc_name);
 			goto next;
 		}
 
 		mountpoint = mp->mnt_stat.f_mntonname;
 
 		error = vn_start_write(NULL, &mp, V_WAIT);
 		if (error != 0) {
 			GJ_DEBUG(0, "vn_start_write(%s) failed (error=%d).",
 			    mountpoint, error);
 			goto next;
 		}
 
 		save = curthread_pflags_set(TDP_SYNCIO);
 
 		GJ_TIMER_START(1, &bt);
 		vfs_msync(mp, MNT_NOWAIT);
 		GJ_TIMER_STOP(1, &bt, "Msync time of %s", mountpoint);
 
 		GJ_TIMER_START(1, &bt);
 		error = VFS_SYNC(mp, MNT_NOWAIT);
 		if (error == 0)
 			GJ_TIMER_STOP(1, &bt, "Sync time of %s", mountpoint);
 		else {
 			GJ_DEBUG(0, "Cannot sync file system %s (error=%d).",
 			    mountpoint, error);
 		}
 
 		curthread_pflags_restore(save);
 
 		vn_finished_write(mp);
 
 		if (error != 0)
 			goto next;
 
 		/*
 		 * Send BIO_FLUSH before freezing the file system, so it can be
 		 * faster after the freeze.
 		 */
 		GJ_TIMER_START(1, &bt);
 		g_journal_flush_cache(sc);
 		GJ_TIMER_STOP(1, &bt, "BIO_FLUSH time of %s", sc->sc_name);
 
 		GJ_TIMER_START(1, &bt);
 		error = vfs_write_suspend(mp, VS_SKIP_UNMOUNT);
 		GJ_TIMER_STOP(1, &bt, "Suspend time of %s", mountpoint);
 		if (error != 0) {
 			GJ_DEBUG(0, "Cannot suspend file system %s (error=%d).",
 			    mountpoint, error);
 			goto next;
 		}
 
 		error = desc->jd_clean(mp);
 		if (error != 0)
 			goto next;
 
 		mtx_lock(&sc->sc_mtx);
 		g_journal_switch_wait(sc);
 		mtx_unlock(&sc->sc_mtx);
 
 		vfs_write_resume(mp, 0);
 next:
 		mtx_lock(&mountlist_mtx);
 		vfs_unbusy(mp);
 	}
 	mtx_unlock(&mountlist_mtx);
 
 	sc = NULL;
 	for (;;) {
 		g_topology_lock();
 		LIST_FOREACH(gp, &g_journal_class.geom, geom) {
 			sc = gp->softc;
 			if (sc == NULL)
 				continue;
 			mtx_lock(&sc->sc_mtx);
 			if ((sc->sc_type & GJ_TYPE_COMPLETE) == GJ_TYPE_COMPLETE &&
 			    !(sc->sc_flags & GJF_DEVICE_DESTROY) &&
 			    (sc->sc_flags & GJF_DEVICE_BEFORE_SWITCH)) {
 				break;
 			}
 			mtx_unlock(&sc->sc_mtx);
 			sc = NULL;
 		}
 		g_topology_unlock();
 		if (sc == NULL)
 			break;
 		mtx_assert(&sc->sc_mtx, MA_OWNED);
 		g_journal_switch_wait(sc);
 		mtx_unlock(&sc->sc_mtx);
 	}
 }
 
 static void
 g_journal_start_switcher(struct g_class *mp)
 {
 	int error;
 
 	g_topology_assert();
 	MPASS(g_journal_switcher_proc == NULL);
 	g_journal_switcher_state = GJ_SWITCHER_WORKING;
 	error = kproc_create(g_journal_switcher, mp, &g_journal_switcher_proc,
 	    0, 0, "g_journal switcher");
 	KASSERT(error == 0, ("Cannot create switcher thread."));
 }
 
 static void
 g_journal_stop_switcher(void)
 {
 	g_topology_assert();
 	MPASS(g_journal_switcher_proc != NULL);
 	g_journal_switcher_state = GJ_SWITCHER_DIE;
 	wakeup(&g_journal_switcher_state);
 	while (g_journal_switcher_state != GJ_SWITCHER_DIED)
 		tsleep(&g_journal_switcher_state, PRIBIO, "jfini:wait", hz / 5);
 	GJ_DEBUG(1, "Switcher died.");
 	g_journal_switcher_proc = NULL;
 }
 
 /*
  * TODO: Kill switcher thread on last geom destruction?
  */
 static void
 g_journal_switcher(void *arg)
 {
 	struct g_class *mp;
 	struct bintime bt;
 	int error;
 
 	mp = arg;
 	curthread->td_pflags |= TDP_NORUNNINGBUF;
 	for (;;) {
 		g_journal_switcher_wokenup = 0;
 		error = tsleep(&g_journal_switcher_state, PRIBIO, "jsw:wait",
 		    g_journal_switch_time * hz);
 		if (g_journal_switcher_state == GJ_SWITCHER_DIE) {
 			g_journal_switcher_state = GJ_SWITCHER_DIED;
 			GJ_DEBUG(1, "Switcher exiting.");
 			wakeup(&g_journal_switcher_state);
 			kproc_exit(0);
 		}
 		if (error == 0 && g_journal_sync_requested == 0) {
 			GJ_DEBUG(1, "Out of cache, force switch (used=%jd "
 			    "limit=%jd).", (intmax_t)g_journal_cache_used,
 			    (intmax_t)g_journal_cache_limit);
 		}
 		GJ_TIMER_START(1, &bt);
 		g_journal_do_switch(mp);
 		GJ_TIMER_STOP(1, &bt, "Entire switch time");
 		if (g_journal_sync_requested > 0) {
 			g_journal_sync_requested = 0;
 			wakeup(&g_journal_sync_requested);
 		}
 	}
 }
Index: head/sys/geom/journal/g_journal.h
===================================================================
--- head/sys/geom/journal/g_journal.h	(revision 350693)
+++ head/sys/geom/journal/g_journal.h	(revision 350694)
@@ -1,394 +1,376 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2005-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef	_G_JOURNAL_H_
 #define	_G_JOURNAL_H_
 
 #include <sys/endian.h>
 #include <sys/md5.h>
 #ifdef _KERNEL
 #include <sys/bio.h>
 #endif
 
 #define	G_JOURNAL_CLASS_NAME	"JOURNAL"
 
 #define	G_JOURNAL_MAGIC		"GEOM::JOURNAL"
 /*
  * Version history:
  * 0 - Initial version number.
  */
 #define	G_JOURNAL_VERSION	0
 
 #ifdef _KERNEL
 extern int g_journal_debug;
 
-#define	GJ_DEBUG(lvl, ...)	do {					\
-	if (g_journal_debug >= (lvl)) {					\
-		printf("GEOM_JOURNAL");					\
-		if (g_journal_debug > 0)				\
-			printf("[%u]", lvl);				\
-		printf(": ");						\
-		printf(__VA_ARGS__);					\
-		printf("\n");						\
-	}								\
-} while (0)
-#define	GJ_LOGREQ(lvl, bp, ...)	do {					\
-	if (g_journal_debug >= (lvl)) {					\
-		printf("GEOM_JOURNAL");					\
-		if (g_journal_debug > 0)				\
-			printf("[%u]", lvl);				\
-		printf(": ");						\
-		printf(__VA_ARGS__);					\
-		printf(" ");						\
-		g_print_bio(bp);					\
-		printf("\n");						\
-	}								\
-} while (0)
+#define	GJ_DEBUG(lvl, ...) \
+    _GEOM_DEBUG("GEOM_JOURNAL", g_journal_debug, (lvl), NULL, __VA_ARGS__)
+#define	GJ_LOGREQ(lvl, bp, ...) \
+    _GEOM_DEBUG("GEOM_JOURNAL", g_journal_debug, (lvl), (bp), __VA_ARGS__)
 
 #define	JEMPTY(sc)	((sc)->sc_journal_offset -			\
 			 (sc)->sc_jprovider->sectorsize ==		\
 			 (sc)->sc_active.jj_offset &&			\
 			 (sc)->sc_current_count == 0)
 
 #define	GJ_BIO_REGULAR		0x00
 #define	GJ_BIO_READ		0x01
 #define	GJ_BIO_JOURNAL		0x02
 #define	GJ_BIO_COPY		0x03
 #define	GJ_BIO_MASK		0x0f
 
 #if 0
 #define	GJF_BIO_DONT_FREE	0x10
 #define	GJF_BIO_MASK		0xf0
 #endif
 
 #define	GJF_DEVICE_HARDCODED		0x0001
 #define	GJF_DEVICE_DESTROY		0x0010
 #define	GJF_DEVICE_SWITCH		0x0020
 #define	GJF_DEVICE_BEFORE_SWITCH	0x0040
 #define	GJF_DEVICE_CLEAN		0x0080
 #define	GJF_DEVICE_CHECKSUM		0x0100
 
 #define	GJ_HARD_LIMIT		64
 
 /*
  * We keep pointers to journaled data in bio structure and because we
  * need to store two off_t values (offset in data provider and offset in
  * journal), we have to borrow bio_completed field for this.
  */
 #define	bio_joffset	bio_completed
 /*
  * Use bio_caller1 field as a pointer in queue.
  */
 #define	bio_next	bio_caller1
 
 /*
  * There are two such structures maintained inside each journaled device.
  * One describes active part of the journal, were recent requests are stored.
  * The second describes the last consistent part of the journal with requests
  * that are copied to the destination provider.
  */
 struct g_journal_journal {
 	struct bio	*jj_queue;	/* Cached journal entries. */
 	off_t		 jj_offset;	/* Journal's start offset. */
 };
 
 struct g_journal_softc {
 	uint32_t	 sc_id;
 	uint8_t		 sc_type;
 	uint8_t		 sc_orig_type;
 	struct g_geom	*sc_geom;
 	u_int		 sc_flags;
 	struct mtx	 sc_mtx;
 	off_t		 sc_mediasize;
 	u_int		 sc_sectorsize;
 #define	GJ_FLUSH_DATA		0x01
 #define	GJ_FLUSH_JOURNAL	0x02
 	u_int		 sc_bio_flush;
 
 	uint32_t	 sc_journal_id;
 	uint32_t	 sc_journal_next_id;
 	int		 sc_journal_copying;
 	off_t		 sc_journal_offset;
 	off_t		 sc_journal_previous_id;
 
 	struct bio_queue_head sc_back_queue;
 	struct bio_queue_head sc_regular_queue;
 
 	struct bio_queue_head sc_delayed_queue;
 	int		 sc_delayed_count;
 
 	struct bio	*sc_current_queue;
 	int		 sc_current_count;
 
 	struct bio	*sc_flush_queue;
 	int		 sc_flush_count;
 	int		 sc_flush_in_progress;
 
 	struct bio	*sc_copy_queue;
 	int		 sc_copy_in_progress;
 
 	struct g_consumer *sc_dconsumer;
 	struct g_consumer *sc_jconsumer;
 
 	struct g_journal_journal sc_inactive;
 	struct g_journal_journal sc_active;
 
 	off_t		 sc_jstart;	/* Journal space start offset. */
 	off_t		 sc_jend;	/* Journal space end offset. */
 
 	struct callout	 sc_callout;
 	struct proc	*sc_worker;
 
 	struct root_hold_token *sc_rootmount;
 };
 #define	sc_dprovider	sc_dconsumer->provider
 #define	sc_jprovider	sc_jconsumer->provider
 #define	sc_name		sc_dprovider->name
 
 #define	GJQ_INSERT_HEAD(head, bp)	do {				\
 	(bp)->bio_next = (head);					\
 	(head) = (bp);							\
 } while (0)
 #define	GJQ_INSERT_AFTER(head, bp, pbp)	do {				\
 	if ((pbp) == NULL)						\
 		GJQ_INSERT_HEAD(head, bp);				\
 	else {								\
 		(bp)->bio_next = (pbp)->bio_next;			\
 		(pbp)->bio_next = (bp);					\
 	}								\
 } while (0)
 #define GJQ_LAST(head, bp) do {						\
 	struct bio *_bp;						\
 									\
 	if ((head) == NULL) {						\
 		(bp) = (head);						\
 		break;							\
 	}								\
 	for (_bp = (head); _bp->bio_next != NULL; _bp = _bp->bio_next)	\
 		continue;						\
 	(bp) = (_bp);							\
 } while (0)
 #define	GJQ_FIRST(head)	(head)
 #define	GJQ_REMOVE(head, bp)	do {					\
 	struct bio *_bp;						\
 									\
 	if ((head) == (bp)) {						\
 		(head) = (bp)->bio_next;				\
 		(bp)->bio_next = NULL;					\
 		break;							\
 	}								\
 	for (_bp = (head); _bp->bio_next != NULL; _bp = _bp->bio_next) {\
 		if (_bp->bio_next == (bp))				\
 			break;						\
 	}								\
 	KASSERT(_bp->bio_next != NULL, ("NULL bio_next"));		\
 	KASSERT(_bp->bio_next == (bp), ("bio_next != bp"));		\
 	_bp->bio_next = (bp)->bio_next;					\
 	(bp)->bio_next = NULL;						\
 } while (0)
 #define GJQ_FOREACH(head, bp)						\
 	for ((bp) = (head); (bp) != NULL; (bp) = (bp)->bio_next)
 
 #define	GJ_HEADER_MAGIC	"GJHDR"
 
 struct g_journal_header {
 	char		jh_magic[sizeof(GJ_HEADER_MAGIC)];
 	uint32_t	jh_journal_id;
 	uint32_t	jh_journal_next_id;
 } __packed;
 
 struct g_journal_entry {
 	uint64_t	je_joffset;
 	uint64_t	je_offset;
 	uint64_t	je_length;
 } __packed;
 
 #define	GJ_RECORD_HEADER_MAGIC		"GJRHDR"
 #define	GJ_RECORD_HEADER_NENTRIES	(20)
 #define	GJ_RECORD_MAX_SIZE(sc)	\
 	((sc)->sc_jprovider->sectorsize + GJ_RECORD_HEADER_NENTRIES * MAXPHYS)
 #define	GJ_VALIDATE_OFFSET(offset, sc)	do {				\
 	if ((offset) + GJ_RECORD_MAX_SIZE(sc) >= (sc)->sc_jend) {	\
 		(offset) = (sc)->sc_jstart;				\
 		GJ_DEBUG(2, "Starting from the beginning (%s).",		\
 		    (sc)->sc_name);					\
 	}								\
 } while (0)
 
 struct g_journal_record_header {
 	char		jrh_magic[sizeof(GJ_RECORD_HEADER_MAGIC)];
 	uint32_t	jrh_journal_id;
 	uint16_t	jrh_nentries;
 	u_char		jrh_sum[8];
 	struct g_journal_entry jrh_entries[GJ_RECORD_HEADER_NENTRIES];
 } __packed;
 
 typedef int (g_journal_clean_t)(struct mount *mp);
 typedef void (g_journal_dirty_t)(struct g_consumer *cp);
 
 struct g_journal_desc {
 	const char		*jd_fstype;
 	g_journal_clean_t	*jd_clean;
 	g_journal_dirty_t	*jd_dirty;
 };
 
 /* Supported file systems. */
 extern const struct g_journal_desc g_journal_ufs;
 
 #define	GJ_TIMER_START(lvl, bt)	do {					\
 	if (g_journal_debug >= (lvl))					\
 		binuptime(bt);						\
 } while (0)
 #define	GJ_TIMER_STOP(lvl, bt, ...)	do {				\
 	if (g_journal_debug >= (lvl)) {					\
 		struct bintime _bt2;					\
 		struct timeval _tv;					\
 									\
 		binuptime(&_bt2);					\
 		bintime_sub(&_bt2, bt);					\
 		bintime2timeval(&_bt2, &_tv);				\
 		printf("GEOM_JOURNAL");					\
 		if (g_journal_debug > 0)				\
 			printf("[%u]", lvl);				\
 		printf(": ");						\
 		printf(__VA_ARGS__);					\
 		printf(": %jd.%06jds\n", (intmax_t)_tv.tv_sec,		\
 		    (intmax_t)_tv.tv_usec);				\
 	}								\
 } while (0)
 #endif	/* _KERNEL */
 
 #define	GJ_TYPE_DATA		0x01
 #define	GJ_TYPE_JOURNAL		0x02
 #define	GJ_TYPE_COMPLETE	(GJ_TYPE_DATA|GJ_TYPE_JOURNAL)
 
 #define	GJ_FLAG_CLEAN		0x01
 #define	GJ_FLAG_CHECKSUM	0x02
 
 struct g_journal_metadata {
 	char		md_magic[16];	/* Magic value. */
 	uint32_t	md_version;	/* Version number. */
 	uint32_t	md_id;		/* Journal unique ID. */
 	uint8_t		md_type;	/* Provider type. */
 	uint64_t	md_jstart;	/* Journal space start offset. */
 	uint64_t	md_jend;	/* Journal space end offset. */
 	uint64_t	md_joffset;	/* Last known consistent journal offset. */
 	uint32_t	md_jid;		/* Last known consistent journal ID. */
 	uint64_t	md_flags;	/* Journal flags. */
 	char		md_provider[16]; /* Hardcoded provider. */
 	uint64_t	md_provsize;	/* Provider's size. */
 	u_char		md_hash[16];	/* MD5 hash. */
 };
 static __inline void
 journal_metadata_encode(struct g_journal_metadata *md, u_char *data)
 {
 	MD5_CTX ctx;
 
 	bcopy(md->md_magic, data, 16);
 	le32enc(data + 16, md->md_version);
 	le32enc(data + 20, md->md_id);
 	*(data + 24) = md->md_type;
 	le64enc(data + 25, md->md_jstart);
 	le64enc(data + 33, md->md_jend);
 	le64enc(data + 41, md->md_joffset);
 	le32enc(data + 49, md->md_jid);
 	le64enc(data + 53, md->md_flags);
 	bcopy(md->md_provider, data + 61, 16);
 	le64enc(data + 77, md->md_provsize);
 	MD5Init(&ctx);
 	MD5Update(&ctx, data, 85);
 	MD5Final(md->md_hash, &ctx);
 	bcopy(md->md_hash, data + 85, 16);
 }
 static __inline int
 journal_metadata_decode_v0(const u_char *data, struct g_journal_metadata *md)
 {
 	MD5_CTX ctx;
 
 	md->md_id = le32dec(data + 20);
 	md->md_type = *(data + 24);
 	md->md_jstart = le64dec(data + 25);
 	md->md_jend = le64dec(data + 33);
 	md->md_joffset = le64dec(data + 41);
 	md->md_jid = le32dec(data + 49);
 	md->md_flags = le64dec(data + 53);
 	bcopy(data + 61, md->md_provider, 16);
 	md->md_provsize = le64dec(data + 77);
 	MD5Init(&ctx);
 	MD5Update(&ctx, data, 85);
 	MD5Final(md->md_hash, &ctx);
 	if (bcmp(md->md_hash, data + 85, 16) != 0)
 		return (EINVAL);
 	return (0);
 }
 static __inline int
 journal_metadata_decode(const u_char *data, struct g_journal_metadata *md)
 {
 	int error;
 
 	bcopy(data, md->md_magic, 16);
 	md->md_version = le32dec(data + 16);
 	switch (md->md_version) {
 	case 0:
 		error = journal_metadata_decode_v0(data, md);
 		break;
 	default:
 		error = EINVAL;
 		break;
 	}
 	return (error);
 }
 
 static __inline void
 journal_metadata_dump(const struct g_journal_metadata *md)
 {
 	static const char hex[] = "0123456789abcdef";
 	char hash[16 * 2 + 1];
 	u_int i;
 
 	printf("     magic: %s\n", md->md_magic);
 	printf("   version: %u\n", (u_int)md->md_version);
 	printf("        id: %u\n", (u_int)md->md_id);
 	printf("      type: %u\n", (u_int)md->md_type);
 	printf("     start: %ju\n", (uintmax_t)md->md_jstart);
 	printf("       end: %ju\n", (uintmax_t)md->md_jend);
 	printf("   joffset: %ju\n", (uintmax_t)md->md_joffset);
 	printf("       jid: %u\n", (u_int)md->md_jid);
 	printf("     flags: %u\n", (u_int)md->md_flags);
 	printf("hcprovider: %s\n", md->md_provider);
 	printf("  provsize: %ju\n", (uintmax_t)md->md_provsize);
 	bzero(hash, sizeof(hash));
 	for (i = 0; i < 16; i++) {
 		hash[i * 2] = hex[md->md_hash[i] >> 4];
 		hash[i * 2 + 1] = hex[md->md_hash[i] & 0x0f];
 	}
 	printf("  MD5 hash: %s\n", hash);
 }
 #endif	/* !_G_JOURNAL_H_ */
Index: head/sys/geom/journal/g_journal_ufs.c
===================================================================
--- head/sys/geom/journal/g_journal_ufs.c	(revision 350693)
+++ head/sys/geom/journal/g_journal_ufs.c	(revision 350694)
@@ -1,104 +1,105 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2005-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/vnode.h>
 #include <sys/mount.h>
 
 #include <ufs/ufs/extattr.h>
 #include <ufs/ufs/quota.h>
 #include <ufs/ufs/inode.h>
 #include <ufs/ufs/ufs_extern.h>
 #include <ufs/ufs/ufsmount.h>
 
 #include <ufs/ffs/fs.h>
 #include <ufs/ffs/ffs_extern.h>
 
 #include <geom/geom.h>
+#include <geom/geom_dbg.h>
 #include <geom/journal/g_journal.h>
 
 static int
 g_journal_ufs_clean(struct mount *mp)
 {
 	struct ufsmount *ump;
 	struct fs *fs;
 	int flags;
 
 	ump = VFSTOUFS(mp);
 	fs = ump->um_fs;
 
 	flags = fs->fs_flags;
 	fs->fs_flags &= ~(FS_UNCLEAN | FS_NEEDSFSCK);
 	ffs_sbupdate(ump, MNT_WAIT, 1);
 	fs->fs_flags = flags;
 
 	return (0);
 }
 
 static void
 g_journal_ufs_dirty(struct g_consumer *cp)
 {
 	struct fs *fs;
 	int error;
 
 	fs = NULL;
 	if (SBLOCKSIZE % cp->provider->sectorsize != 0 ||
 	    ffs_sbget(cp, &fs, STDSB, M_GEOM, g_use_g_read_data) != 0) {
 		GJ_DEBUG(0, "Cannot find superblock to mark file system %s "
 		    "as dirty.", cp->provider->name);
 		KASSERT(fs == NULL,
 		    ("g_journal_ufs_dirty: non-NULL fs %p\n", fs));
 		return;
 	}
 	GJ_DEBUG(0, "clean=%d flags=0x%x", fs->fs_clean, fs->fs_flags);
 	fs->fs_clean = 0;
 	fs->fs_flags |= FS_NEEDSFSCK | FS_UNCLEAN;
 	error = ffs_sbput(cp, fs, fs->fs_sblockloc, g_use_g_write_data);
 	g_free(fs->fs_csp);
 	g_free(fs);
 	if (error != 0) {
 		GJ_DEBUG(0, "Cannot mark file system %s as dirty "
 		    "(error=%d).", cp->provider->name, error);
 	} else {
 		GJ_DEBUG(0, "File system %s marked as dirty.",
 		    cp->provider->name);
 	}
 }
 
 const struct g_journal_desc g_journal_ufs = {
 	.jd_fstype = "ufs",
 	.jd_clean = g_journal_ufs_clean,
 	.jd_dirty = g_journal_ufs_dirty
 };
 
 MODULE_DEPEND(g_journal, ufs, 1, 1, 1);
 MODULE_VERSION(geom_journal, 0);
Index: head/sys/geom/label/g_label.c
===================================================================
--- head/sys/geom/label/g_label.c	(revision 350693)
+++ head/sys/geom/label/g_label.c	(revision 350694)
@@ -1,560 +1,561 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2004-2005 Pawel Jakub Dawidek <pjd@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_geom.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/bio.h>
 #include <sys/ctype.h>
 #include <sys/malloc.h>
 #include <sys/libkern.h>
 #include <sys/sbuf.h>
 #include <sys/stddef.h>
 #include <sys/sysctl.h>
 #include <geom/geom.h>
+#include <geom/geom_dbg.h>
 #include <geom/geom_slice.h>
 #include <geom/label/g_label.h>
 
 FEATURE(geom_label, "GEOM labeling support");
 
 SYSCTL_DECL(_kern_geom);
 SYSCTL_NODE(_kern_geom, OID_AUTO, label, CTLFLAG_RW, 0, "GEOM_LABEL stuff");
 u_int g_label_debug = 0;
 SYSCTL_UINT(_kern_geom_label, OID_AUTO, debug, CTLFLAG_RWTUN, &g_label_debug, 0,
     "Debug level");
 
 static int g_label_destroy_geom(struct gctl_req *req, struct g_class *mp,
     struct g_geom *gp);
 static int g_label_destroy(struct g_geom *gp, boolean_t force);
 static struct g_geom *g_label_taste(struct g_class *mp, struct g_provider *pp,
     int flags __unused);
 static void g_label_config(struct gctl_req *req, struct g_class *mp,
     const char *verb);
 
 struct g_class g_label_class = {
 	.name = G_LABEL_CLASS_NAME,
 	.version = G_VERSION,
 	.ctlreq = g_label_config,
 	.taste = g_label_taste,
 	.destroy_geom = g_label_destroy_geom
 };
 
 /*
  * To add a new file system where you want to look for volume labels,
  * you have to:
  * 1. Add a file g_label_<file system>.c which implements labels recognition.
  * 2. Add an 'extern const struct g_label_desc g_label_<file system>;' into
  *    g_label.h file.
  * 3. Add an element to the table below '&g_label_<file system>,'.
  * 4. Add your file to sys/conf/files.
  * 5. Add your file to sys/modules/geom/geom_label/Makefile.
  * 6. Add your file system to manual page sbin/geom/class/label/glabel.8.
  */
 const struct g_label_desc *g_labels[] = {
 	&g_label_gpt,
 	&g_label_gpt_uuid,
 #ifdef GEOM_LABEL
 	&g_label_ufs_id,
 	&g_label_ufs_volume,
 	&g_label_iso9660,
 	&g_label_msdosfs,
 	&g_label_ext2fs,
 	&g_label_reiserfs,
 	&g_label_ntfs,
 	&g_label_disk_ident,
 	&g_label_flashmap,
 #endif
 	NULL
 };
 
 void
 g_label_rtrim(char *label, size_t size)
 {
 	ptrdiff_t i;
 
 	for (i = size - 1; i >= 0; i--) {
 		if (label[i] == '\0')
 			continue;
 		else if (label[i] == ' ')
 			label[i] = '\0';
 		else
 			break;
 	}
 }
 
 static int
 g_label_destroy_geom(struct gctl_req *req __unused, struct g_class *mp,
     struct g_geom *gp __unused)
 {
 
 	/*
 	 * XXX: Unloading a class which is using geom_slice:1.56 is currently
 	 * XXX: broken, so we deny unloading when we have geoms.
 	 */
 	return (EOPNOTSUPP);
 }
 
 static void
 g_label_orphan(struct g_consumer *cp)
 {
 
 	G_LABEL_DEBUG(1, "Label %s removed.",
 	    LIST_FIRST(&cp->geom->provider)->name);
 	g_slice_orphan(cp);
 }
 
 static void
 g_label_spoiled(struct g_consumer *cp)
 {
 
 	G_LABEL_DEBUG(1, "Label %s removed.",
 	    LIST_FIRST(&cp->geom->provider)->name);
 	g_slice_spoiled(cp);
 }
 
 static void
 g_label_resize(struct g_consumer *cp)
 {
 
 	G_LABEL_DEBUG(1, "Label %s resized.",
 	    LIST_FIRST(&cp->geom->provider)->name);
 
 	g_slice_config(cp->geom, 0, G_SLICE_CONFIG_FORCE, (off_t)0,
 	    cp->provider->mediasize, cp->provider->sectorsize, "notused");
 }
 
 static int
 g_label_is_name_ok(const char *label)
 {
 	const char *s;
 
 	/* Check if the label starts from ../ */
 	if (strncmp(label, "../", 3) == 0)
 		return (0);
 	/* Check if the label contains /../ */
 	if (strstr(label, "/../") != NULL)
 		return (0);
 	/* Check if the label ends at ../ */
 	if ((s = strstr(label, "/..")) != NULL && s[3] == '\0')
 		return (0);
 	return (1);
 }
 
 static void
 g_label_mangle_name(char *label, size_t size)
 {
 	struct sbuf *sb;
 	const u_char *c;
 
 	sb = sbuf_new(NULL, NULL, size, SBUF_FIXEDLEN);
 	for (c = label; *c != '\0'; c++) {
 		if (!isprint(*c) || isspace(*c) || *c =='"' || *c == '%')
 			sbuf_printf(sb, "%%%02X", *c);
 		else
 			sbuf_putc(sb, *c);
 	}
 	if (sbuf_finish(sb) != 0)
 		label[0] = '\0';
 	else
 		strlcpy(label, sbuf_data(sb), size);
 	sbuf_delete(sb);
 }
 
 static struct g_geom *
 g_label_create(struct gctl_req *req, struct g_class *mp, struct g_provider *pp,
     const char *label, const char *dir, off_t mediasize)
 {
 	struct g_geom *gp;
 	struct g_provider *pp2;
 	struct g_consumer *cp;
 	char name[64];
 
 	g_topology_assert();
 
 	if (!g_label_is_name_ok(label)) {
 		G_LABEL_DEBUG(0, "%s contains suspicious label, skipping.",
 		    pp->name);
 		G_LABEL_DEBUG(1, "%s suspicious label is: %s", pp->name, label);
 		if (req != NULL)
 			gctl_error(req, "Label name %s is invalid.", label);
 		return (NULL);
 	}
 	gp = NULL;
 	cp = NULL;
 	if (snprintf(name, sizeof(name), "%s/%s", dir, label) >= sizeof(name)) {
 		if (req != NULL)
 			gctl_error(req, "Label name %s is too long.", label);
 		return (NULL);
 	}
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		pp2 = LIST_FIRST(&gp->provider);
 		if (pp2 == NULL)
 			continue;
 		if ((pp2->flags & G_PF_ORPHAN) != 0)
 			continue;
 		if (strcmp(pp2->name, name) == 0) {
 			G_LABEL_DEBUG(1, "Label %s(%s) already exists (%s).",
 			    label, name, pp->name);
 			if (req != NULL) {
 				gctl_error(req, "Provider %s already exists.",
 				    name);
 			}
 			return (NULL);
 		}
 	}
 	gp = g_slice_new(mp, 1, pp, &cp, NULL, 0, NULL);
 	if (gp == NULL) {
 		G_LABEL_DEBUG(0, "Cannot create slice %s.", label);
 		if (req != NULL)
 			gctl_error(req, "Cannot create slice %s.", label);
 		return (NULL);
 	}
 	gp->orphan = g_label_orphan;
 	gp->spoiled = g_label_spoiled;
 	gp->resize = g_label_resize;
 	g_access(cp, -1, 0, 0);
 	g_slice_config(gp, 0, G_SLICE_CONFIG_SET, (off_t)0, mediasize,
 	    pp->sectorsize, "%s", name);
 	G_LABEL_DEBUG(1, "Label for provider %s is %s.", pp->name, name);
 	return (gp);
 }
 
 static int
 g_label_destroy(struct g_geom *gp, boolean_t force)
 {
 	struct g_provider *pp;
 
 	g_topology_assert();
 	pp = LIST_FIRST(&gp->provider);
 	if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
 		if (force) {
 			G_LABEL_DEBUG(0, "Provider %s is still open, so it "
 			    "can't be definitely removed.", pp->name);
 		} else {
 			G_LABEL_DEBUG(1,
 			    "Provider %s is still open (r%dw%de%d).", pp->name,
 			    pp->acr, pp->acw, pp->ace);
 			return (EBUSY);
 		}
 	} else if (pp != NULL)
 		G_LABEL_DEBUG(1, "Label %s removed.", pp->name);
 	g_slice_spoiled(LIST_FIRST(&gp->consumer));
 	return (0);
 }
 
 static int
 g_label_read_metadata(struct g_consumer *cp, struct g_label_metadata *md)
 {
 	struct g_provider *pp;
 	u_char *buf;
 	int error;
 
 	g_topology_assert();
 
 	pp = cp->provider;
 	g_topology_unlock();
 	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
 	    &error);
 	g_topology_lock();
 	if (buf == NULL)
 		return (error);
 	/* Decode metadata. */
 	label_metadata_decode(buf, md);
 	g_free(buf);
 
 	return (0);
 }
 
 static void
 g_label_orphan_taste(struct g_consumer *cp __unused)
 {
 
 	KASSERT(1 == 0, ("%s called?", __func__));
 }
 
 static void
 g_label_start_taste(struct bio *bp __unused)
 {
 
 	KASSERT(1 == 0, ("%s called?", __func__));
 }
 
 static int
 g_label_access_taste(struct g_provider *pp __unused, int dr __unused,
     int dw __unused, int de __unused)
 {
 
 	KASSERT(1 == 0, ("%s called", __func__));
 	return (EOPNOTSUPP);
 }
 
 static struct g_geom *
 g_label_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
 {
 	struct g_label_metadata md;
 	struct g_consumer *cp;
 	struct g_geom *gp;
 	int i;
 
 	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
 	g_topology_assert();
 
 	G_LABEL_DEBUG(2, "Tasting %s.", pp->name);
 
 	/* Skip providers that are already open for writing. */
 	if (pp->acw > 0)
 		return (NULL);
 
 	if (strcmp(pp->geom->class->name, mp->name) == 0)
 		return (NULL);
 
 	gp = g_new_geomf(mp, "label:taste");
 	gp->start = g_label_start_taste;
 	gp->access = g_label_access_taste;
 	gp->orphan = g_label_orphan_taste;
 	cp = g_new_consumer(gp);
 	g_attach(cp, pp);
 	if (g_access(cp, 1, 0, 0) != 0)
 		goto end;
 	do {
 		if (g_label_read_metadata(cp, &md) != 0)
 			break;
 		if (strcmp(md.md_magic, G_LABEL_MAGIC) != 0)
 			break;
 		if (md.md_version > G_LABEL_VERSION) {
 			printf("geom_label.ko module is too old to handle %s.\n",
 			    pp->name);
 			break;
 		}
 
 		/*
 		 * Backward compatibility:
 		 */
 		/*
 		 * There was no md_provsize field in earlier versions of
 		 * metadata.
 		 */
 		if (md.md_version < 2)
 			md.md_provsize = pp->mediasize;
 
 		if (md.md_provsize != pp->mediasize)
 			break;
 
 		g_label_create(NULL, mp, pp, md.md_label, G_LABEL_DIR,
 		    pp->mediasize - pp->sectorsize);
 	} while (0);
 	for (i = 0; g_labels[i] != NULL; i++) {
 		char label[128];
 
 		if (g_labels[i]->ld_enabled == 0)
 			continue;
 		g_topology_unlock();
 		g_labels[i]->ld_taste(cp, label, sizeof(label));
 		g_label_mangle_name(label, sizeof(label));
 		g_topology_lock();
 		if (label[0] == '\0')
 			continue;
 		g_label_create(NULL, mp, pp, label, g_labels[i]->ld_dir,
 		    pp->mediasize);
 	}
 	g_access(cp, -1, 0, 0);
 end:
 	g_detach(cp);
 	g_destroy_consumer(cp);
 	g_destroy_geom(gp);
 	return (NULL);
 }
 
 static void
 g_label_ctl_create(struct gctl_req *req, struct g_class *mp)
 {
 	struct g_provider *pp;
 	const char *name;
 	int *nargs;
 
 	g_topology_assert();
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "No '%s' argument", "nargs");
 		return;
 	}
 	if (*nargs != 2) {
 		gctl_error(req, "Invalid number of arguments.");
 		return;
 	}
 	/*
 	 * arg1 is the name of provider.
 	 */
 	name = gctl_get_asciiparam(req, "arg1");
 	if (name == NULL) {
 		gctl_error(req, "No 'arg%d' argument", 1);
 		return;
 	}
 	if (strncmp(name, "/dev/", strlen("/dev/")) == 0)
 		name += strlen("/dev/");
 	pp = g_provider_by_name(name);
 	if (pp == NULL) {
 		G_LABEL_DEBUG(1, "Provider %s is invalid.", name);
 		gctl_error(req, "Provider %s is invalid.", name);
 		return;
 	}
 	/*
 	 * arg0 is the label.
 	 */
 	name = gctl_get_asciiparam(req, "arg0");
 	if (name == NULL) {
 		gctl_error(req, "No 'arg%d' argument", 0);
 		return;
 	}
 	g_label_create(req, mp, pp, name, G_LABEL_DIR, pp->mediasize);
 }
 
 static const char *
 g_label_skip_dir(const char *name)
 {
 	char path[64];
 	u_int i;
 
 	if (strncmp(name, "/dev/", strlen("/dev/")) == 0)
 		name += strlen("/dev/");
 	if (strncmp(name, G_LABEL_DIR "/", strlen(G_LABEL_DIR "/")) == 0)
 		name += strlen(G_LABEL_DIR "/");
 	for (i = 0; g_labels[i] != NULL; i++) {
 		snprintf(path, sizeof(path), "%s/", g_labels[i]->ld_dir);
 		if (strncmp(name, path, strlen(path)) == 0) {
 			name += strlen(path);
 			break;
 		}
 	}
 	return (name);
 }
 
 static struct g_geom *
 g_label_find_geom(struct g_class *mp, const char *name)
 {
 	struct g_geom *gp;
 	struct g_provider *pp;
 	const char *pname;
 
 	name = g_label_skip_dir(name);
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		pp = LIST_FIRST(&gp->provider);
 		pname = g_label_skip_dir(pp->name);
 		if (strcmp(pname, name) == 0)
 			return (gp);
 	}
 	return (NULL);
 }
 
 static void
 g_label_ctl_destroy(struct gctl_req *req, struct g_class *mp)
 {
 	int *nargs, *force, error, i;
 	struct g_geom *gp;
 	const char *name;
 	char param[16];
 
 	g_topology_assert();
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "No '%s' argument", "nargs");
 		return;
 	}
 	if (*nargs <= 0) {
 		gctl_error(req, "Missing device(s).");
 		return;
 	}
 	force = gctl_get_paraml(req, "force", sizeof(*force));
 	if (force == NULL) {
 		gctl_error(req, "No 'force' argument");
 		return;
 	}
 
 	for (i = 0; i < *nargs; i++) {
 		snprintf(param, sizeof(param), "arg%d", i);
 		name = gctl_get_asciiparam(req, param);
 		if (name == NULL) {
 			gctl_error(req, "No 'arg%d' argument", i);
 			return;
 		}
 		gp = g_label_find_geom(mp, name);
 		if (gp == NULL) {
 			G_LABEL_DEBUG(1, "Label %s is invalid.", name);
 			gctl_error(req, "Label %s is invalid.", name);
 			return;
 		}
 		error = g_label_destroy(gp, *force);
 		if (error != 0) {
 			gctl_error(req, "Cannot destroy label %s (error=%d).",
 			    LIST_FIRST(&gp->provider)->name, error);
 			return;
 		}
 	}
 }
 
 static void
 g_label_config(struct gctl_req *req, struct g_class *mp, const char *verb)
 {
 	uint32_t *version;
 
 	g_topology_assert();
 
 	version = gctl_get_paraml(req, "version", sizeof(*version));
 	if (version == NULL) {
 		gctl_error(req, "No '%s' argument.", "version");
 		return;
 	}
 	if (*version != G_LABEL_VERSION) {
 		gctl_error(req, "Userland and kernel parts are out of sync.");
 		return;
 	}
 
 	if (strcmp(verb, "create") == 0) {
 		g_label_ctl_create(req, mp);
 		return;
 	} else if (strcmp(verb, "destroy") == 0 ||
 	    strcmp(verb, "stop") == 0) {
 		g_label_ctl_destroy(req, mp);
 		return;
 	}
 
 	gctl_error(req, "Unknown verb.");
 }
 
 DECLARE_GEOM_CLASS(g_label_class, g_label);
 MODULE_VERSION(geom_label, 0);
Index: head/sys/geom/label/g_label.h
===================================================================
--- head/sys/geom/label/g_label.h	(revision 350693)
+++ head/sys/geom/label/g_label.h	(revision 350694)
@@ -1,120 +1,112 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2004-2005 Pawel Jakub Dawidek <pjd@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef	_G_LABEL_H_
 #define	_G_LABEL_H_
 
 #include <sys/endian.h>
 #ifdef _KERNEL
 #include <sys/sysctl.h>
 #endif
 
 #define	G_LABEL_CLASS_NAME	"LABEL"
 
 #define	G_LABEL_MAGIC		"GEOM::LABEL"
 /*
  * Version history:
  * 1 - Initial version number.
  * 2 - Added md_provsize field to metadata.
  */
 #define	G_LABEL_VERSION		2
 #define	G_LABEL_DIR		"label"
 
 #ifdef _KERNEL
 extern u_int g_label_debug;
 
-#define	G_LABEL_DEBUG(lvl, ...)	do {					\
-	if (g_label_debug >= (lvl)) {					\
-		printf("GEOM_LABEL");					\
-		if (g_label_debug > 0)					\
-			printf("[%u]", lvl);				\
-		printf(": ");						\
-		printf(__VA_ARGS__);					\
-		printf("\n");						\
-	}								\
-} while (0)
+#define G_LABEL_DEBUG(lvl, ...) \
+    _GEOM_DEBUG("GEOM_LABEL", g_label_debug, (lvl), NULL, __VA_ARGS__)
 
 SYSCTL_DECL(_kern_geom_label);
 
 #define	G_LABEL_INIT(kind, label, descr) 				\
 	SYSCTL_NODE(_kern_geom_label, OID_AUTO, kind, CTLFLAG_RD,	\
 	    NULL, "");							\
 	SYSCTL_INT(_kern_geom_label_##kind, OID_AUTO, enable, 		\
 	    CTLFLAG_RWTUN, &label.ld_enabled, 1, descr)
 
 typedef void g_label_taste_t (struct g_consumer *cp, char *label, size_t size);
 
 struct g_label_desc {
 	g_label_taste_t	*ld_taste;
 	char		*ld_dir;
 	int		 ld_enabled;
 };
 
 /* Supported labels. */
 extern struct g_label_desc g_label_ufs_id;
 extern struct g_label_desc g_label_ufs_volume;
 extern struct g_label_desc g_label_iso9660;
 extern struct g_label_desc g_label_msdosfs;
 extern struct g_label_desc g_label_ext2fs;
 extern struct g_label_desc g_label_reiserfs;
 extern struct g_label_desc g_label_ntfs;
 extern struct g_label_desc g_label_gpt;
 extern struct g_label_desc g_label_gpt_uuid;
 extern struct g_label_desc g_label_disk_ident;
 extern struct g_label_desc g_label_flashmap;
 
 extern void g_label_rtrim(char *label, size_t size);
 #endif	/* _KERNEL */
 
 struct g_label_metadata {
 	char		md_magic[16];	/* Magic value. */
 	uint32_t	md_version;	/* Version number. */
 	char		md_label[16];	/* Label. */
 	uint64_t	md_provsize;	/* Provider's size. */
 };
 static __inline void
 label_metadata_encode(const struct g_label_metadata *md, u_char *data)
 {
 
 	bcopy(md->md_magic, data, sizeof(md->md_magic));
 	le32enc(data + 16, md->md_version);
 	bcopy(md->md_label, data + 20, sizeof(md->md_label));
 	le64enc(data + 36, md->md_provsize);
 }
 static __inline void
 label_metadata_decode(const u_char *data, struct g_label_metadata *md)
 {
 
 	bcopy(data, md->md_magic, sizeof(md->md_magic));
 	md->md_version = le32dec(data + 16);
 	bcopy(data + 20, md->md_label, sizeof(md->md_label));
 	md->md_provsize = le64dec(data + 36);
 }
 #endif	/* _G_LABEL_H_ */
Index: head/sys/geom/label/g_label_ext2fs.c
===================================================================
--- head/sys/geom/label/g_label_ext2fs.c	(revision 350693)
+++ head/sys/geom/label/g_label_ext2fs.c	(revision 350694)
@@ -1,103 +1,104 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2005 Stanislav Sedov
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 
 #include <geom/geom.h>
+#include <geom/geom_dbg.h>
 #include <geom/label/g_label.h>
 
 #define EXT2FS_SB_OFFSET	1024
 #define EXT2_SUPER_MAGIC	0xef53
 #define EXT2_DYNAMIC_REV	1
 
 typedef struct e2sb {
 	uint8_t		fake1[56];
 	uint16_t	s_magic;
 	uint8_t		fake2[18];
 	uint32_t	s_rev_level;
 	uint8_t		fake3[40];
 	char		s_volume_name[16];
 } e2sb_t;
 
 static void
 g_label_ext2fs_taste(struct g_consumer *cp, char *label, size_t size)
 {
 	struct g_provider *pp;
 	e2sb_t *fs;
 	char *s_volume_name;
 
 	g_topology_assert_not();
 	pp = cp->provider;
 	label[0] = '\0';
 
 	if ((EXT2FS_SB_OFFSET % pp->sectorsize) != 0)
 		return;
 
 	fs = (e2sb_t *)g_read_data(cp, EXT2FS_SB_OFFSET, pp->sectorsize, NULL);
 	if (fs == NULL)
 		return;
 
 	/* Check for magic and versio n*/
 	if (fs->s_magic == EXT2_SUPER_MAGIC &&
 	    fs->s_rev_level == EXT2_DYNAMIC_REV) {
 		G_LABEL_DEBUG(1, "ext2fs file system detected on %s.",
 		    pp->name);
 	} else {
 		goto exit_free;
 	}
 
 	s_volume_name = fs->s_volume_name;
 	/* Terminate label */
 	s_volume_name[sizeof(fs->s_volume_name) - 1] = '\0';
 
 	if (s_volume_name[0] == '/')
 		s_volume_name += 1;
 
 	/* Check for volume label */
 	if (s_volume_name[0] == '\0')
 		goto exit_free;
 
 	strlcpy(label, s_volume_name, size);
 
 exit_free:
 	g_free(fs);
 }
 
 struct g_label_desc g_label_ext2fs = {
 	.ld_taste = g_label_ext2fs_taste,
 	.ld_dir = "ext2fs",
 	.ld_enabled = 1
 };
 
 G_LABEL_INIT(ext2fs, g_label_ext2fs, "Create device nodes for EXT2FS volumes");
Index: head/sys/geom/label/g_label_iso9660.c
===================================================================
--- head/sys/geom/label/g_label_iso9660.c	(revision 350693)
+++ head/sys/geom/label/g_label_iso9660.c	(revision 350694)
@@ -1,81 +1,82 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2004 Pawel Jakub Dawidek <pjd@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 
 #include <geom/geom.h>
+#include <geom/geom_dbg.h>
 #include <geom/label/g_label.h>
 
 #define G_LABEL_ISO9660_DIR	"iso9660"
 
 #define	ISO9660_MAGIC	"\x01" "CD001" "\x01\x00"
 #define	ISO9660_OFFSET	0x8000
 #define	VOLUME_LEN	32
 
 
 static void
 g_label_iso9660_taste(struct g_consumer *cp, char *label, size_t size)
 {
 	struct g_provider *pp;
 	char *sector, *volume;
 
 	g_topology_assert_not();
 	pp = cp->provider;
 	label[0] = '\0';
 
 	if ((ISO9660_OFFSET % pp->sectorsize) != 0)
 		return;
 	sector = (char *)g_read_data(cp, ISO9660_OFFSET, pp->sectorsize,
 	    NULL);
 	if (sector == NULL)
 		return;
 	if (bcmp(sector, ISO9660_MAGIC, sizeof(ISO9660_MAGIC) - 1) != 0) {
 		g_free(sector);
 		return;
 	}
 	G_LABEL_DEBUG(1, "ISO9660 file system detected on %s.", pp->name);
 	volume = sector + 0x28;
 	bzero(label, size);
 	strlcpy(label, volume, MIN(size, VOLUME_LEN));
 	g_free(sector);
 	g_label_rtrim(label, size);
 }
 
 struct g_label_desc g_label_iso9660 = {
 	.ld_taste = g_label_iso9660_taste,
 	.ld_dir = G_LABEL_ISO9660_DIR,
 	.ld_enabled = 1
 };
 
 G_LABEL_INIT(iso9660, g_label_iso9660, "Create device nodes for ISO9660 volume names");
Index: head/sys/geom/label/g_label_msdosfs.c
===================================================================
--- head/sys/geom/label/g_label_msdosfs.c	(revision 350693)
+++ head/sys/geom/label/g_label_msdosfs.c	(revision 350694)
@@ -1,219 +1,220 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2004 Pawel Jakub Dawidek <pjd@FreeBSD.org>
  * Copyright (c) 2006 Tobias Reifenberger
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 
 #include <geom/geom.h>
+#include <geom/geom_dbg.h>
 #include <geom/label/g_label.h>
 #include <geom/label/g_label_msdosfs.h>
 
 #define G_LABEL_MSDOSFS_DIR	"msdosfs"
 #define LABEL_NO_NAME		"NO NAME    "
 
 static void
 g_label_msdosfs_taste(struct g_consumer *cp, char *label, size_t size)
 {
 	struct g_provider *pp;
 	FAT_BSBPB *pfat_bsbpb;
 	FAT32_BSBPB *pfat32_bsbpb;
 	FAT_DES *pfat_entry;
 	uint8_t *sector0, *sector;
 
 	g_topology_assert_not();
 	pp = cp->provider;
 	sector0 = NULL;
 	sector = NULL;
 	bzero(label, size);
 
 	/* Check if the sector size of the medium is a valid FAT sector size. */
 	switch(pp->sectorsize) {
 	case 512:
 	case 1024:
 	case 2048:
 	case 4096:
 		break;
 	default:
 		G_LABEL_DEBUG(1, "MSDOSFS: %s: sector size %d not compatible.",
 		    pp->name, pp->sectorsize);
 		return;
 	}
 
 	/* Load 1st sector with boot sector and boot parameter block. */
 	sector0 = (uint8_t *)g_read_data(cp, 0, pp->sectorsize, NULL);
 	if (sector0 == NULL)
 		return;
 
 	/* Check for the FAT boot sector signature. */
 	if (sector0[510] != 0x55 || sector0[511] != 0xaa) {
 		G_LABEL_DEBUG(1, "MSDOSFS: %s: no FAT signature found.",
 		    pp->name);
 		goto error;
 	}
 
 
 	/*
 	 * Test if this is really a FAT volume and determine the FAT type.
 	 */
 
 	pfat_bsbpb = (FAT_BSBPB *)sector0;
 	pfat32_bsbpb = (FAT32_BSBPB *)sector0;
 
 	if (UINT16BYTES(pfat_bsbpb->BPB_FATSz16) != 0) {
 		/*
 		 * If the BPB_FATSz16 field is not zero and the string "FAT" is
 		 * at the right place, this should be a FAT12 or FAT16 volume.
 		 */
 		if (strncmp(pfat_bsbpb->BS_FilSysType, "FAT", 3) != 0) {
 			G_LABEL_DEBUG(1,
 			    "MSDOSFS: %s: FAT12/16 volume not valid.",
 			    pp->name);
 			goto error;
 		}
 		G_LABEL_DEBUG(1, "MSDOSFS: %s: FAT12/FAT16 volume detected.",
 		    pp->name);
 
 		/* A volume with no name should have "NO NAME    " as label. */
 		if (strncmp(pfat_bsbpb->BS_VolLab, LABEL_NO_NAME,
 		    sizeof(pfat_bsbpb->BS_VolLab)) == 0) {
 			G_LABEL_DEBUG(1,
 			    "MSDOSFS: %s: FAT12/16 volume has no name.",
 			    pp->name);
 			goto error;
 		}
 		strlcpy(label, pfat_bsbpb->BS_VolLab,
 		    MIN(size, sizeof(pfat_bsbpb->BS_VolLab) + 1));
 	} else if (UINT32BYTES(pfat32_bsbpb->BPB_FATSz32) != 0) {
 		uint32_t fat_FirstDataSector, fat_BytesPerSector, offset;
 
 		/*
 		 * If the BPB_FATSz32 field is not zero and the string "FAT" is
 		 * at the right place, this should be a FAT32 volume.
 		 */
 		if (strncmp(pfat32_bsbpb->BS_FilSysType, "FAT", 3) != 0) {
 			G_LABEL_DEBUG(1, "MSDOSFS: %s: FAT32 volume not valid.",
 			    pp->name);
 			goto error;
 		}
 		G_LABEL_DEBUG(1, "MSDOSFS: %s: FAT32 volume detected.",
 		    pp->name);
 
 		/*
 		 * If the volume label is not "NO NAME    " we're done.
 		 */
 		if (strncmp(pfat32_bsbpb->BS_VolLab, LABEL_NO_NAME,
 		    sizeof(pfat32_bsbpb->BS_VolLab)) != 0) {
 			strlcpy(label, pfat32_bsbpb->BS_VolLab,
 			    MIN(size, sizeof(pfat32_bsbpb->BS_VolLab) + 1));
 			goto endofchecks;
 		}
 
 		/*
 		 * If the volume label "NO NAME    " is in the boot sector, the
 		 * label of FAT32 volumes may be stored as a special entry in
 		 * the root directory.
 		 */
 		fat_FirstDataSector =
 		    UINT16BYTES(pfat32_bsbpb->BPB_RsvdSecCnt) +
 		    (pfat32_bsbpb->BPB_NumFATs *
 		     UINT32BYTES(pfat32_bsbpb->BPB_FATSz32));
 		fat_BytesPerSector = UINT16BYTES(pfat32_bsbpb->BPB_BytsPerSec);
 
 		G_LABEL_DEBUG(2,
 		    "MSDOSFS: FAT_FirstDataSector=0x%x, FAT_BytesPerSector=%d",
 		    fat_FirstDataSector, fat_BytesPerSector);
 
 		for (offset = fat_BytesPerSector * fat_FirstDataSector;;
 		    offset += fat_BytesPerSector) {
 			sector = (uint8_t *)g_read_data(cp, offset,
 			    fat_BytesPerSector, NULL);
 			if (sector == NULL)
 				goto error;
 
 			pfat_entry = (FAT_DES *)sector;
 			do {
 				/* No more entries available. */
 				if (pfat_entry->DIR_Name[0] == 0) {
 					G_LABEL_DEBUG(1, "MSDOSFS: %s: "
 					    "FAT32 volume has no name.",
 					    pp->name);
 					goto error;
 				}
 
 				/* Skip empty or long name entries. */
 				if (pfat_entry->DIR_Name[0] == 0xe5 ||
 				    (pfat_entry->DIR_Attr &
 				     FAT_DES_ATTR_LONG_NAME) ==
 				    FAT_DES_ATTR_LONG_NAME) {
 					continue;
 				}
 
 				/*
 				 * The name of the entry is the volume label if
 				 * ATTR_VOLUME_ID is set.
 				 */
 				if (pfat_entry->DIR_Attr &
 				    FAT_DES_ATTR_VOLUME_ID) {
 					strlcpy(label, pfat_entry->DIR_Name,
 					    MIN(size,
 					    sizeof(pfat_entry->DIR_Name) + 1));
 					goto endofchecks;
 				}
 			} while((uint8_t *)(++pfat_entry) <
 			    (uint8_t *)(sector + fat_BytesPerSector));
 			g_free(sector);
 		}
 	} else {
 		G_LABEL_DEBUG(1, "MSDOSFS: %s: no FAT volume detected.",
 		    pp->name);
 		goto error;
 	}
 
 endofchecks:
 	g_label_rtrim(label, size);
 
 error:
 	if (sector0 != NULL)
 		g_free(sector0);
 	if (sector != NULL)
 		g_free(sector);
 }
 
 struct g_label_desc g_label_msdosfs = {
 	.ld_taste = g_label_msdosfs_taste,
 	.ld_dir = G_LABEL_MSDOSFS_DIR,
 	.ld_enabled = 1
 };
 
 G_LABEL_INIT(msdosfs, g_label_msdosfs, "Create device nodes for MSDOSFS volumes");
Index: head/sys/geom/label/g_label_reiserfs.c
===================================================================
--- head/sys/geom/label/g_label_reiserfs.c	(revision 350693)
+++ head/sys/geom/label/g_label_reiserfs.c	(revision 350694)
@@ -1,122 +1,123 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2005 Stanislav Sedov
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 
 #include <geom/geom.h>
+#include <geom/geom_dbg.h>
 #include <geom/label/g_label.h>
 
 #define REISERFS_NEW_DISK_OFFSET 64 * 1024
 #define REISERFS_OLD_DISK_OFFSET 8 * 1024
 #define REISERFS_SUPER_MAGIC	"ReIsEr"
 
 typedef struct reiserfs_sb {
 	uint8_t		fake1[52];
 	char		s_magic[10];
 	uint8_t		fake2[10];
 	uint16_t	s_version;
 	uint8_t		fake3[26];
 	char		s_volume_name[16];
 } reiserfs_sb_t;
 
 static reiserfs_sb_t *
 g_label_reiserfs_read_super(struct g_consumer *cp, off_t offset)
 {
 	reiserfs_sb_t *fs;
 	u_int secsize;
 
 	secsize = cp->provider->sectorsize;
 
 	if ((offset % secsize) != 0)
 		return (NULL);
 
 	fs = (reiserfs_sb_t *)g_read_data(cp, offset, secsize, NULL);
 	if (fs == NULL)
 		return (NULL);
 
 	if (strncmp(fs->s_magic, REISERFS_SUPER_MAGIC,
 	    strlen(REISERFS_SUPER_MAGIC)) != 0) {
 		g_free(fs);
 		return (NULL);
 	}
 
 	return (fs);
 }
 
 static void
 g_label_reiserfs_taste(struct g_consumer *cp, char *label, size_t size)
 {
 	struct g_provider *pp;
 	reiserfs_sb_t *fs;
 
 	g_topology_assert_not();
 	pp = cp->provider;
 	label[0] = '\0';
 
 	/* Try old format */
 	fs = g_label_reiserfs_read_super(cp, REISERFS_OLD_DISK_OFFSET);
 	if (fs == NULL) {
 		/* Try new format */
 		fs = g_label_reiserfs_read_super(cp, REISERFS_NEW_DISK_OFFSET);
 	}
 	if (fs == NULL)
 		return;
 
 	/* Check version */
 	if (fs->s_version == 2) {
 		G_LABEL_DEBUG(1, "reiserfs file system detected on %s.",
 		    pp->name);
 	} else {
 		goto exit_free;
 	}
 
 	/* Check for volume label */
 	if (fs->s_volume_name[0] == '\0')
 		goto exit_free;
 
 	/* Terminate label */
 	fs->s_volume_name[sizeof(fs->s_volume_name) - 1] = '\0';
 	strlcpy(label, fs->s_volume_name, size);
 
 exit_free:
 	g_free(fs);
 }
 
 struct g_label_desc g_label_reiserfs = {
 	.ld_taste = g_label_reiserfs_taste,
 	.ld_dir = "reiserfs",
 	.ld_enabled = 1
 };
 
 G_LABEL_INIT(reiserfs, g_label_reiserfs, "Create device nodes for REISERFS volumes");
Index: head/sys/geom/label/g_label_ufs.c
===================================================================
--- head/sys/geom/label/g_label_ufs.c	(revision 350693)
+++ head/sys/geom/label/g_label_ufs.c	(revision 350694)
@@ -1,156 +1,157 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2002, 2003 Gordon Tetlow
  * Copyright (c) 2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/vnode.h>
 
 #include <ufs/ufs/dinode.h>
 #include <ufs/ffs/fs.h>
 #include <ufs/ufs/quota.h>
 #include <ufs/ufs/extattr.h>
 #include <ufs/ffs/ffs_extern.h>
 
 #include <geom/geom.h>
+#include <geom/geom_dbg.h>
 #include <geom/label/g_label.h>
 
 #define G_LABEL_UFS_VOLUME_DIR	"ufs"
 #define G_LABEL_UFS_ID_DIR	"ufsid"
 
 #define	G_LABEL_UFS_VOLUME	0
 #define	G_LABEL_UFS_ID		1
 
 /*
  * G_LABEL_UFS_CMP returns true if difference between provider mediasize
  * and filesystem size is less than G_LABEL_UFS_MAXDIFF sectors
  */
 #define	G_LABEL_UFS_CMP(prov, fsys, size) 				   \
 	( abs( ((fsys)->size) - ( (prov)->mediasize / (fsys)->fs_fsize ))  \
 				< G_LABEL_UFS_MAXDIFF )
 #define	G_LABEL_UFS_MAXDIFF	0x100
 
 /*
  * Try to find a superblock on the provider. If successful, then
  * check that the size in the superblock corresponds to the size
  * of the underlying provider. Finally, look for a volume label
  * and create an appropriate provider based on that.
  */
 static void
 g_label_ufs_taste_common(struct g_consumer *cp, char *label, size_t size, int what)
 {
 	struct g_provider *pp;
 	struct fs *fs;
 
 	g_topology_assert_not();
 	pp = cp->provider;
 	label[0] = '\0';
 
 	fs = NULL;
 	if (SBLOCKSIZE % pp->sectorsize != 0 || ffs_sbget(cp, &fs,
 	    STDSB_NOHASHFAIL, M_GEOM, g_use_g_read_data) != 0) {
 		KASSERT(fs == NULL,
 		    ("g_label_ufs_taste_common: non-NULL fs %p\n", fs));
 		return;
 	}
 
 	/*
 	 * Check for magic. We also need to check if file system size
 	 * is almost equal to providers size, because sysinstall(8)
 	 * used to bogusly put first partition at offset 0
 	 * instead of 16, and glabel/ufs would find file system on slice
 	 * instead of partition.
 	 *
 	 * In addition, media size can be a bit bigger than file system
 	 * size. For instance, mkuzip can append bytes to align data
 	 * to large sector size (it improves compression rates).
 	 */
 	if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_fsize > 0 &&
 	    ( G_LABEL_UFS_CMP(pp, fs, fs_old_size)
 		|| G_LABEL_UFS_CMP(pp, fs, fs_providersize))) {
 		/* Valid UFS1. */
 	} else if (fs->fs_magic == FS_UFS2_MAGIC && fs->fs_fsize > 0 &&
 	    ( G_LABEL_UFS_CMP(pp, fs, fs_size)
 		|| G_LABEL_UFS_CMP(pp, fs, fs_providersize))) {
 		/* Valid UFS2. */
 	} else {
 		goto out;
 	}
 	G_LABEL_DEBUG(1, "%s file system detected on %s.",
 	    fs->fs_magic == FS_UFS1_MAGIC ? "UFS1" : "UFS2", pp->name);
 	switch (what) {
 	case G_LABEL_UFS_VOLUME:
 		/* Check for volume label */
 		if (fs->fs_volname[0] != '\0')
 			strlcpy(label, fs->fs_volname, size);
 		break;
 	case G_LABEL_UFS_ID:
 		if (fs->fs_id[0] != 0 || fs->fs_id[1] != 0)
 			snprintf(label, size, "%08x%08x", fs->fs_id[0],
 			    fs->fs_id[1]);
 		break;
 	}
 out:
 	g_free(fs->fs_csp);
 	g_free(fs);
 }
 
 static void
 g_label_ufs_volume_taste(struct g_consumer *cp, char *label, size_t size)
 {
 
 	g_label_ufs_taste_common(cp, label, size, G_LABEL_UFS_VOLUME);
 }
 
 static void
 g_label_ufs_id_taste(struct g_consumer *cp, char *label, size_t size)
 {
 
 	g_label_ufs_taste_common(cp, label, size, G_LABEL_UFS_ID);
 }
 
 struct g_label_desc g_label_ufs_volume = {
 	.ld_taste = g_label_ufs_volume_taste,
 	.ld_dir = G_LABEL_UFS_VOLUME_DIR,
 	.ld_enabled = 1
 };
 
 struct g_label_desc g_label_ufs_id = {
 	.ld_taste = g_label_ufs_id_taste,
 	.ld_dir = G_LABEL_UFS_ID_DIR,
 	.ld_enabled = 1
 };
 
 G_LABEL_INIT(ufsid, g_label_ufs_id, "Create device nodes for UFS file system IDs");
 G_LABEL_INIT(ufs, g_label_ufs_volume, "Create device nodes for UFS volume names");
 
 MODULE_DEPEND(g_label, ufs, 1, 1, 1);
Index: head/sys/geom/linux_lvm/g_linux_lvm.c
===================================================================
--- head/sys/geom/linux_lvm/g_linux_lvm.c	(revision 350693)
+++ head/sys/geom/linux_lvm/g_linux_lvm.c	(revision 350694)
@@ -1,1193 +1,1194 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2008 Andrew Thompson <thompsa@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/ctype.h>
 #include <sys/param.h>
 #include <sys/bio.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/malloc.h>
 #include <sys/queue.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 
 #include <geom/geom.h>
+#include <geom/geom_dbg.h>
 #include <sys/endian.h>
 
 #include <geom/linux_lvm/g_linux_lvm.h>
 
 FEATURE(geom_linux_lvm, "GEOM Linux LVM partitioning support");
 
 /* Declare malloc(9) label */
 static MALLOC_DEFINE(M_GLLVM, "gllvm", "GEOM_LINUX_LVM Data");
 
 /* GEOM class methods */
 static g_access_t g_llvm_access;
 static g_init_t g_llvm_init;
 static g_orphan_t g_llvm_orphan;
 static g_orphan_t g_llvm_taste_orphan;
 static g_start_t g_llvm_start;
 static g_taste_t g_llvm_taste;
 static g_ctl_destroy_geom_t g_llvm_destroy_geom;
 
 static void	g_llvm_done(struct bio *);
 static void	g_llvm_remove_disk(struct g_llvm_vg *, struct g_consumer *);
 static int	g_llvm_activate_lv(struct g_llvm_vg *, struct g_llvm_lv *);
 static int	g_llvm_add_disk(struct g_llvm_vg *, struct g_provider *, char *);
 static void	g_llvm_free_vg(struct g_llvm_vg *);
 static int	g_llvm_destroy(struct g_llvm_vg *, int);
 static int	g_llvm_read_label(struct g_consumer *, struct g_llvm_label *);
 static int	g_llvm_read_md(struct g_consumer *, struct g_llvm_metadata *,
 		    struct g_llvm_label *);
 
 static int	llvm_label_decode(const u_char *, struct g_llvm_label *, int);
 static int	llvm_md_decode(const u_char *, struct g_llvm_metadata *,
 		    struct g_llvm_label *);
 static int	llvm_textconf_decode(u_char *, int,
 		    struct g_llvm_metadata *);
 static int	llvm_textconf_decode_pv(char **, char *, struct g_llvm_vg *);
 static int	llvm_textconf_decode_lv(char **, char *, struct g_llvm_vg *);
 static int	llvm_textconf_decode_sg(char **, char *, struct g_llvm_lv *);
 
 SYSCTL_DECL(_kern_geom);
 SYSCTL_NODE(_kern_geom, OID_AUTO, linux_lvm, CTLFLAG_RW, 0,
     "GEOM_LINUX_LVM stuff");
 static u_int g_llvm_debug = 0;
 SYSCTL_UINT(_kern_geom_linux_lvm, OID_AUTO, debug, CTLFLAG_RWTUN, &g_llvm_debug, 0,
     "Debug level");
 
 LIST_HEAD(, g_llvm_vg) vg_list;
 
 /*
  * Called to notify geom when it's been opened, and for what intent
  */
 static int
 g_llvm_access(struct g_provider *pp, int dr, int dw, int de)
 {
 	struct g_consumer *c;
 	struct g_llvm_vg *vg;
 	struct g_geom *gp;
 	int error;
 
 	KASSERT(pp != NULL, ("%s: NULL provider", __func__));
 	gp = pp->geom;
 	KASSERT(gp != NULL, ("%s: NULL geom", __func__));
 	vg = gp->softc;
 
 	if (vg == NULL) {
 		/* It seems that .access can be called with negative dr,dw,dx
 		 * in this case but I want to check for myself */
 		G_LLVM_DEBUG(0, "access(%d, %d, %d) for %s",
 		    dr, dw, de, pp->name);
 
 		/* This should only happen when geom is withered so
 		 * allow only negative requests */
 		KASSERT(dr <= 0 && dw <= 0 && de <= 0,
 		    ("%s: Positive access for %s", __func__, pp->name));
 		if (pp->acr + dr == 0 && pp->acw + dw == 0 && pp->ace + de == 0)
 			G_LLVM_DEBUG(0,
 			    "Device %s definitely destroyed", pp->name);
 		return (0);
 	}
 
 	/* Grab an exclusive bit to propagate on our consumers on first open */
 	if (pp->acr == 0 && pp->acw == 0 && pp->ace == 0)
 		de++;
 	/* ... drop it on close */
 	if (pp->acr + dr == 0 && pp->acw + dw == 0 && pp->ace + de == 0)
 		de--;
 
 	error = ENXIO;
 	LIST_FOREACH(c, &gp->consumer, consumer) {
 		KASSERT(c != NULL, ("%s: consumer is NULL", __func__));
 		error = g_access(c, dr, dw, de);
 		if (error != 0) {
 			struct g_consumer *c2;
 
 			/* Backout earlier changes */
 			LIST_FOREACH(c2, &gp->consumer, consumer) {
 				if (c2 == c) /* all eariler components fixed */
 					return (error);
 				g_access(c2, -dr, -dw, -de);
 			}
 		}
 	}
 
 	return (error);
 }
 
 /*
  * Dismantle bio_queue and destroy its components
  */
 static void
 bioq_dismantle(struct bio_queue_head *bq)
 {
 	struct bio *b;
 
 	for (b = bioq_first(bq); b != NULL; b = bioq_first(bq)) {
 		bioq_remove(bq, b);
 		g_destroy_bio(b);
 	}
 }
 
 /*
  * GEOM .done handler
  * Can't use standard handler because one requested IO may
  * fork into additional data IOs
  */
 static void
 g_llvm_done(struct bio *b)
 {
 	struct bio *parent_b;
 
 	parent_b = b->bio_parent;
 
 	if (b->bio_error != 0) {
 		G_LLVM_DEBUG(0, "Error %d for offset=%ju, length=%ju on %s",
 		    b->bio_error, b->bio_offset, b->bio_length,
 		    b->bio_to->name);
 		if (parent_b->bio_error == 0)
 			parent_b->bio_error = b->bio_error;
 	}
 
 	parent_b->bio_inbed++;
 	parent_b->bio_completed += b->bio_completed;
 
 	if (parent_b->bio_children == parent_b->bio_inbed) {
 		parent_b->bio_completed = parent_b->bio_length;
 		g_io_deliver(parent_b, parent_b->bio_error);
 	}
 	g_destroy_bio(b);
 }
 
 static void
 g_llvm_start(struct bio *bp)
 {
 	struct g_provider *pp;
 	struct g_llvm_vg *vg;
 	struct g_llvm_pv *pv;
 	struct g_llvm_lv *lv;
 	struct g_llvm_segment *sg;
 	struct bio *cb;
 	struct bio_queue_head bq;
 	size_t chunk_size;
 	off_t offset, length;
 	char *addr;
 	u_int count;
 
 	pp = bp->bio_to;
 	lv = pp->private;
 	vg = pp->geom->softc;
 
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 	case BIO_WRITE:
 	case BIO_DELETE:
 	/* XXX BIO_GETATTR allowed? */
 		break;
 	default:
 		g_io_deliver(bp, EOPNOTSUPP);
 		return;
 	}
 
 	bioq_init(&bq);
 
 	chunk_size = vg->vg_extentsize;
 	addr = bp->bio_data;
 	offset = bp->bio_offset;	/* virtual offset and length */
 	length = bp->bio_length;
 
 	while (length > 0) {
 		size_t chunk_index, in_chunk_offset, in_chunk_length;
 
 		pv = NULL;
 		cb = g_clone_bio(bp);
 		if (cb == NULL) {
 			bioq_dismantle(&bq);
 			if (bp->bio_error == 0)
 				bp->bio_error = ENOMEM;
 			g_io_deliver(bp, bp->bio_error);
 			return;
 		}
 
 		/* get the segment and the pv */
 		if (lv->lv_sgcount == 1) {
 			/* skip much of the calculations for a single sg */
 			chunk_index = 0;
 			in_chunk_offset = 0;
 			in_chunk_length = length;
 			sg = lv->lv_firstsg;
 			pv = sg->sg_pv;
 			cb->bio_offset = offset + sg->sg_pvoffset;
 		} else {
 			chunk_index = offset / chunk_size; /* round downwards */
 			in_chunk_offset = offset % chunk_size;
 			in_chunk_length =
 			    min(length, chunk_size - in_chunk_offset);
 
 			/* XXX could be faster */
 			LIST_FOREACH(sg, &lv->lv_segs, sg_next) {
 				if (chunk_index >= sg->sg_start &&
 				    chunk_index <= sg->sg_end) {
 					/* adjust chunk index for sg start */
 					chunk_index -= sg->sg_start;
 					pv = sg->sg_pv;
 					break;
 				}
 			}
 			cb->bio_offset =
 			    (off_t)chunk_index * (off_t)chunk_size
 			    + in_chunk_offset + sg->sg_pvoffset;
 		}
 
 		KASSERT(pv != NULL, ("Can't find PV for chunk %zu",
 		    chunk_index));
 
 		cb->bio_to = pv->pv_gprov;
 		cb->bio_done = g_llvm_done;
 		cb->bio_length = in_chunk_length;
 		cb->bio_data = addr;
 		cb->bio_caller1 = pv;
 		bioq_disksort(&bq, cb);
 
 		G_LLVM_DEBUG(5,
 		    "Mapped %s(%ju, %ju) on %s to %zu(%zu,%zu) @ %s:%ju",
 		    bp->bio_cmd == BIO_READ ? "R" : "W",
 		    offset, length, lv->lv_name,
 		    chunk_index, in_chunk_offset, in_chunk_length,
 		    pv->pv_name, cb->bio_offset);
 
 		addr += in_chunk_length;
 		length -= in_chunk_length;
 		offset += in_chunk_length;
 	}
 
 	/* Fire off bio's here */
 	count = 0;
 	for (cb = bioq_first(&bq); cb != NULL; cb = bioq_first(&bq)) {
 		bioq_remove(&bq, cb);
 		pv = cb->bio_caller1;
 		cb->bio_caller1 = NULL;
 		G_LLVM_DEBUG(6, "firing bio to %s, offset=%ju, length=%ju",
 		    cb->bio_to->name, cb->bio_offset, cb->bio_length);
 		g_io_request(cb, pv->pv_gcons);
 		count++;
 	}
 	if (count == 0) { /* We handled everything locally */
 		bp->bio_completed = bp->bio_length;
 		g_io_deliver(bp, 0);
 	}
 }
 
 static void
 g_llvm_remove_disk(struct g_llvm_vg *vg, struct g_consumer *cp)
 {
 	struct g_llvm_pv *pv;
 	struct g_llvm_lv *lv;
 	struct g_llvm_segment *sg;
 	int found;
 
 	KASSERT(cp != NULL, ("Non-valid disk in %s.", __func__));
 	pv = (struct g_llvm_pv *)cp->private;
 
 	G_LLVM_DEBUG(0, "Disk %s removed from %s.", cp->provider->name,
 	    pv->pv_name);
 
 	LIST_FOREACH(lv, &vg->vg_lvs, lv_next) {
 		/* Find segments that map to this disk */
 		found = 0;
 		LIST_FOREACH(sg, &lv->lv_segs, sg_next) {
 			if (sg->sg_pv == pv) {
 				sg->sg_pv = NULL;
 				lv->lv_sgactive--;
 				found = 1;
 				break;
 			}
 		}
 		if (found) {
 			G_LLVM_DEBUG(0, "Device %s removed.",
 			    lv->lv_gprov->name);
 			g_wither_provider(lv->lv_gprov, ENXIO);
 			lv->lv_gprov = NULL;
 		}
 	}
 
 	if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
 		g_access(cp, -cp->acr, -cp->acw, -cp->ace);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 }
 
 static void
 g_llvm_orphan(struct g_consumer *cp)
 {
 	struct g_llvm_vg *vg;
 	struct g_geom *gp;
 
 	g_topology_assert();
 	gp = cp->geom;
 	vg = gp->softc;
 	if (vg == NULL)
 		return;
 
 	g_llvm_remove_disk(vg, cp);
 	g_llvm_destroy(vg, 1);
 }
 
 static int
 g_llvm_activate_lv(struct g_llvm_vg *vg, struct g_llvm_lv *lv)
 {
 	struct g_geom *gp;
 	struct g_provider *pp;
 
 	g_topology_assert();
 
 	KASSERT(lv->lv_sgactive == lv->lv_sgcount, ("segment missing"));
 
 	gp = vg->vg_geom;
 	pp = g_new_providerf(gp, "linux_lvm/%s-%s", vg->vg_name, lv->lv_name);
 	pp->mediasize = vg->vg_extentsize * (off_t)lv->lv_extentcount;
 	pp->sectorsize = vg->vg_sectorsize;
 	g_error_provider(pp, 0);
 	lv->lv_gprov = pp;
 	pp->private = lv;
 
 	G_LLVM_DEBUG(1, "Created %s, %juM", pp->name,
 	    pp->mediasize / (1024*1024));
 
 	return (0);
 }
 
 static int
 g_llvm_add_disk(struct g_llvm_vg *vg, struct g_provider *pp, char *uuid)
 {
 	struct g_geom *gp;
 	struct g_consumer *cp, *fcp;
 	struct g_llvm_pv *pv;
 	struct g_llvm_lv *lv;
 	struct g_llvm_segment *sg;
 	int error;
 
 	g_topology_assert();
 
 	LIST_FOREACH(pv, &vg->vg_pvs, pv_next) {
 		if (strcmp(pv->pv_uuid, uuid) == 0)
 			break;	/* found it */
 	}
 	if (pv == NULL) {
 		G_LLVM_DEBUG(3, "uuid %s not found in pv list", uuid);
 		return (ENOENT);
 	}
 	if (pv->pv_gprov != NULL) {
 		G_LLVM_DEBUG(0, "disk %s already initialised in %s",
 		    pv->pv_name, vg->vg_name);
 		return (EEXIST);
 	}
 
 	pv->pv_start *= vg->vg_sectorsize;
 	gp = vg->vg_geom;
 	fcp = LIST_FIRST(&gp->consumer);
 
 	cp = g_new_consumer(gp);
 	error = g_attach(cp, pp);
 	G_LLVM_DEBUG(1, "Attached %s to %s at offset %ju",
 	    pp->name, pv->pv_name, pv->pv_start);
 
 	if (error != 0) {
 		G_LLVM_DEBUG(0, "cannot attach %s to %s",
 		    pp->name, vg->vg_name);
 		g_destroy_consumer(cp);
 		return (error);
 	}
 
 	if (fcp != NULL) {
 		if (fcp->provider->sectorsize != pp->sectorsize) {
 			G_LLVM_DEBUG(0, "Provider %s of %s has invalid "
 			    "sector size (%d)", pp->name, vg->vg_name,
 			    pp->sectorsize);
 			return (EINVAL);
 		}
 		if (fcp->acr > 0 || fcp->acw || fcp->ace > 0) {
 			/* Replicate access permissions from first "live"
 			 * consumer to the new one */
 			error = g_access(cp, fcp->acr, fcp->acw, fcp->ace);
 			if (error != 0) {
 				g_detach(cp);
 				g_destroy_consumer(cp);
 				return (error);
 			}
 		}
 	}
 
 	cp->private = pv;
 	pv->pv_gcons = cp;
 	pv->pv_gprov = pp;
 
 	LIST_FOREACH(lv, &vg->vg_lvs, lv_next) {
 		/* Find segments that map to this disk */
 		LIST_FOREACH(sg, &lv->lv_segs, sg_next) {
 			if (strcmp(sg->sg_pvname, pv->pv_name) == 0) {
 				/* avtivate the segment */
 				KASSERT(sg->sg_pv == NULL,
 				    ("segment already mapped"));
 				sg->sg_pvoffset =
 				    (off_t)sg->sg_pvstart * vg->vg_extentsize
 				    + pv->pv_start;
 				sg->sg_pv = pv;
 				lv->lv_sgactive++;
 
 				G_LLVM_DEBUG(2, "%s: %d to %d @ %s:%d"
 				    " offset %ju sector %ju",
 				    lv->lv_name, sg->sg_start, sg->sg_end,
 				    sg->sg_pvname, sg->sg_pvstart,
 				    sg->sg_pvoffset,
 				    sg->sg_pvoffset / vg->vg_sectorsize);
 			}
 		}
 		/* Activate any lvs waiting on this disk */
 		if (lv->lv_gprov == NULL && lv->lv_sgactive == lv->lv_sgcount) {
 			error = g_llvm_activate_lv(vg, lv);
 			if (error)
 				break;
 		}
 	}
 	return (error);
 }
 
 static void
 g_llvm_init(struct g_class *mp)
 {
 	LIST_INIT(&vg_list);
 }
 
 static void
 g_llvm_free_vg(struct g_llvm_vg *vg)
 {
 	struct g_llvm_pv *pv;
 	struct g_llvm_lv *lv;
 	struct g_llvm_segment *sg;
 
 	/* Free all the structures */
 	while ((pv = LIST_FIRST(&vg->vg_pvs)) != NULL) {
 		LIST_REMOVE(pv, pv_next);
 		free(pv, M_GLLVM);
 	}
 	while ((lv = LIST_FIRST(&vg->vg_lvs)) != NULL) {
 		while ((sg = LIST_FIRST(&lv->lv_segs)) != NULL) {
 			LIST_REMOVE(sg, sg_next);
 			free(sg, M_GLLVM);
 		}
 		LIST_REMOVE(lv, lv_next);
 		free(lv, M_GLLVM);
 	}
 	LIST_REMOVE(vg, vg_next);
 	free(vg, M_GLLVM);
 }
 
 static void
 g_llvm_taste_orphan(struct g_consumer *cp)
 {
 
 	KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
 	    cp->provider->name));
 }
 
 static struct g_geom *
 g_llvm_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
 {
 	struct g_consumer *cp;
 	struct g_geom *gp;
 	struct g_llvm_label ll;
 	struct g_llvm_metadata md;
 	struct g_llvm_vg *vg;
 	int error;
 
 	bzero(&md, sizeof(md));
 
 	g_topology_assert();
 	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
 	gp = g_new_geomf(mp, "linux_lvm:taste");
 	/* This orphan function should be never called. */
 	gp->orphan = g_llvm_taste_orphan;
 	cp = g_new_consumer(gp);
 	g_attach(cp, pp);
 	error = g_llvm_read_label(cp, &ll);
 	if (!error)
 		error = g_llvm_read_md(cp, &md, &ll);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 	g_destroy_geom(gp);
 	if (error != 0)
 		return (NULL);
 
 	vg = md.md_vg;
 	if (vg->vg_geom == NULL) {
 		/* new volume group */
 		gp = g_new_geomf(mp, "%s", vg->vg_name);
 		gp->start = g_llvm_start;
 		gp->spoiled = g_llvm_orphan;
 		gp->orphan = g_llvm_orphan;
 		gp->access = g_llvm_access;
 		vg->vg_sectorsize = pp->sectorsize;
 		vg->vg_extentsize *= vg->vg_sectorsize;
 		vg->vg_geom = gp;
 		gp->softc = vg;
 		G_LLVM_DEBUG(1, "Created volume %s, extent size %zuK",
 		    vg->vg_name, vg->vg_extentsize / 1024);
 	}
 
 	/* initialise this disk in the volume group */
 	g_llvm_add_disk(vg, pp, ll.ll_uuid);
 	return (vg->vg_geom);
 }
 
 static int
 g_llvm_destroy(struct g_llvm_vg *vg, int force)
 {
 	struct g_provider *pp;
 	struct g_geom *gp;
 
 	g_topology_assert();
 	if (vg == NULL)
 		return (ENXIO);
 	gp = vg->vg_geom;
 
 	LIST_FOREACH(pp, &gp->provider, provider) {
 		if (pp->acr != 0 || pp->acw != 0 || pp->ace != 0) {
 			G_LLVM_DEBUG(1, "Device %s is still open (r%dw%de%d)",
 			    pp->name, pp->acr, pp->acw, pp->ace);
 			if (!force)
 				return (EBUSY);
 		}
 	}
 
 	g_llvm_free_vg(gp->softc);
 	gp->softc = NULL;
 	g_wither_geom(gp, ENXIO);
 	return (0);
 }
 
 static int
 g_llvm_destroy_geom(struct gctl_req *req __unused, struct g_class *mp __unused,
     struct g_geom *gp)
 {
 	struct g_llvm_vg *vg;
 
 	vg = gp->softc;
 	return (g_llvm_destroy(vg, 0));
 }
 
 int
 g_llvm_read_label(struct g_consumer *cp, struct g_llvm_label *ll)
 {
 	struct g_provider *pp;
 	u_char *buf;
 	int i, error = 0;
 
 	g_topology_assert();
 
 	/* The LVM label is stored on the first four sectors */
 	error = g_access(cp, 1, 0, 0);
 	if (error != 0)
 		return (error);
 	pp = cp->provider;
 	g_topology_unlock();
 	buf = g_read_data(cp, 0, pp->sectorsize * 4, &error);
 	g_topology_lock();
 	g_access(cp, -1, 0, 0);
 	if (buf == NULL) {
 		G_LLVM_DEBUG(1, "Cannot read metadata from %s (error=%d)",
 		    pp->name, error);
 		return (error);
 	}
 
 	/* Search the four sectors for the LVM label. */
 	for (i = 0; i < 4; i++) {
 		error = llvm_label_decode(&buf[i * pp->sectorsize], ll, i);
 		if (error == 0)
 			break;	/* found it */
 	}
 	g_free(buf);
 	return (error);
 }
 
 int
 g_llvm_read_md(struct g_consumer *cp, struct g_llvm_metadata *md,
     struct g_llvm_label *ll)
 {
 	struct g_provider *pp;
 	u_char *buf;
 	int error;
 	int size;
 
 	g_topology_assert();
 
 	error = g_access(cp, 1, 0, 0);
 	if (error != 0)
 		return (error);
 	pp = cp->provider;
 	g_topology_unlock();
 	buf = g_read_data(cp, ll->ll_md_offset, pp->sectorsize, &error);
 	g_topology_lock();
 	g_access(cp, -1, 0, 0);
 	if (buf == NULL) {
 		G_LLVM_DEBUG(0, "Cannot read metadata from %s (error=%d)",
 		    cp->provider->name, error);
 		return (error);
 	}
 
 	error = llvm_md_decode(buf, md, ll);
 	g_free(buf);
 	if (error != 0) {
 		return (error);
 	}
 
 	G_LLVM_DEBUG(1, "reading LVM2 config @ %s:%ju", pp->name,
 		    ll->ll_md_offset + md->md_reloffset);
 	error = g_access(cp, 1, 0, 0);
 	if (error != 0)
 		return (error);
 	pp = cp->provider;
 	g_topology_unlock();
 	/* round up to the nearest sector */
 	size = md->md_relsize +
 	    (pp->sectorsize - md->md_relsize % pp->sectorsize);
 	buf = g_read_data(cp, ll->ll_md_offset + md->md_reloffset, size, &error);
 	g_topology_lock();
 	g_access(cp, -1, 0, 0);
 	if (buf == NULL) {
 		G_LLVM_DEBUG(0, "Cannot read LVM2 config from %s (error=%d)",
 		    pp->name, error);
 		return (error);
 	}
 	buf[md->md_relsize] = '\0';
 	G_LLVM_DEBUG(10, "LVM config:\n%s\n", buf);
 	error = llvm_textconf_decode(buf, md->md_relsize, md);
 	g_free(buf);
 
 	return (error);
 }
 
 static int
 llvm_label_decode(const u_char *data, struct g_llvm_label *ll, int sector)
 {
 	uint64_t off;
 	char *uuid;
 
 	/* Magic string */
 	if (bcmp("LABELONE", data , 8) != 0)
 		return (EINVAL);
 
 	/* We only support LVM2 text format */
 	if (bcmp("LVM2 001", data + 24, 8) != 0) {
 		G_LLVM_DEBUG(0, "Unsupported LVM format");
 		return (EINVAL);
 	}
 
 	ll->ll_sector = le64dec(data + 8);
 	ll->ll_crc = le32dec(data + 16);
 	ll->ll_offset = le32dec(data + 20);
 
 	if (ll->ll_sector != sector) {
 		G_LLVM_DEBUG(0, "Expected sector %ju, found at %d",
 		    ll->ll_sector, sector);
 		return (EINVAL);
 	}
 
 	off = ll->ll_offset;
 	/*
 	 * convert the binary uuid to string format, the format is
 	 * xxxxxx-xxxx-xxxx-xxxx-xxxx-xxxx-xxxxxx (6-4-4-4-4-4-6)
 	 */
 	uuid = ll->ll_uuid;
 	bcopy(data + off, uuid, 6);
 	off += 6;
 	uuid += 6;
 	*uuid++ = '-';
 	for (int i = 0; i < 5; i++) {
 		bcopy(data + off, uuid, 4);
 		off += 4;
 		uuid += 4;
 		*uuid++ = '-';
 	}
 	bcopy(data + off, uuid, 6);
 	off += 6;
 	uuid += 6;
 	*uuid++ = '\0';
 
 	ll->ll_size = le64dec(data + off);
 	off += 8;
 	ll->ll_pestart = le64dec(data + off);
 	off += 16;
 
 	/* Only one data section is supported */
 	if (le64dec(data + off) != 0) {
 		G_LLVM_DEBUG(0, "Only one data section supported");
 		return (EINVAL);
 	}
 
 	off += 16;
 	ll->ll_md_offset = le64dec(data + off);
 	off += 8;
 	ll->ll_md_size = le64dec(data + off);
 	off += 8;
 
 	G_LLVM_DEBUG(1, "LVM metadata: offset=%ju, size=%ju", ll->ll_md_offset,
 	    ll->ll_md_size);
 
 	/* Only one data section is supported */
 	if (le64dec(data + off) != 0) {
 		G_LLVM_DEBUG(0, "Only one metadata section supported");
 		return (EINVAL);
 	}
 
 	G_LLVM_DEBUG(2, "label uuid=%s", ll->ll_uuid);
 	G_LLVM_DEBUG(2, "sector=%ju, crc=%u, offset=%u, size=%ju, pestart=%ju",
 	    ll->ll_sector, ll->ll_crc, ll->ll_offset, ll->ll_size,
 	    ll->ll_pestart);
 
 	return (0);
 }
 
 static int
 llvm_md_decode(const u_char *data, struct g_llvm_metadata *md,
     struct g_llvm_label *ll)
 {
 	uint64_t off;
 	char magic[16];
 
 	off = 0;
 	md->md_csum = le32dec(data + off);
 	off += 4;
 	bcopy(data + off, magic, 16);
 	off += 16;
 	md->md_version = le32dec(data + off);
 	off += 4;
 	md->md_start = le64dec(data + off);
 	off += 8;
 	md->md_size = le64dec(data + off);
 	off += 8;
 
 	if (bcmp(G_LLVM_MAGIC, magic, 16) != 0) {
 		G_LLVM_DEBUG(0, "Incorrect md magic number");
 		return (EINVAL);
 	}
 	if (md->md_version != 1) {
 		G_LLVM_DEBUG(0, "Incorrect md version number (%u)",
 		    md->md_version);
 		return (EINVAL);
 	}
 	if (md->md_start != ll->ll_md_offset) {
 		G_LLVM_DEBUG(0, "Incorrect md offset (%ju)", md->md_start);
 		return (EINVAL);
 	}
 
 	/* Aparently only one is ever returned */
 	md->md_reloffset = le64dec(data + off);
 	off += 8;
 	md->md_relsize = le64dec(data + off);
 	off += 16;	/* XXX skipped checksum */
 
 	if (le64dec(data + off) != 0) {
 		G_LLVM_DEBUG(0, "Only one reloc supported");
 		return (EINVAL);
 	}
 
 	G_LLVM_DEBUG(3, "reloc: offset=%ju, size=%ju",
 	    md->md_reloffset, md->md_relsize);
 	G_LLVM_DEBUG(3, "md: version=%u, start=%ju, size=%ju",
 	    md->md_version, md->md_start, md->md_size);
 
 	return (0);
 }
 
 #define	GRAB_INT(key, tok1, tok2, v)					\
 	if (tok1 && tok2 && strncmp(tok1, key, sizeof(key)) == 0) {	\
 		v = strtol(tok2, &tok1, 10);				\
 		if (tok1 == tok2)					\
 			/* strtol did not eat any of the buffer */	\
 			goto bad;					\
 		continue;						\
 	}
 
 #define	GRAB_STR(key, tok1, tok2, v, len)				\
 	if (tok1 && tok2 && strncmp(tok1, key, sizeof(key)) == 0) {	\
 		strsep(&tok2, "\"");					\
 		if (tok2 == NULL)					\
 			continue;					\
 		tok1 = strsep(&tok2, "\"");				\
 		if (tok2 == NULL)					\
 			continue;					\
 		strncpy(v, tok1, len);					\
 		continue;						\
 	}
 
 #define	SPLIT(key, value, str)						\
 	key = strsep(&value, str);					\
 	/* strip trailing whitespace on the key */			\
 	for (char *t = key; *t != '\0'; t++)				\
 		if (isspace(*t)) {					\
 			*t = '\0';					\
 			break;						\
 		}
 
 static size_t 
 llvm_grab_name(char *name, const char *tok)
 {
 	size_t len;
 
 	len = 0;
 	if (tok == NULL)
 		return (0);
 	if (tok[0] == '-')
 		return (0);
 	if (strcmp(tok, ".") == 0 || strcmp(tok, "..") == 0)
 		return (0);
 	while (tok[len] && (isalpha(tok[len]) || isdigit(tok[len]) ||
 	    tok[len] == '.' || tok[len] == '_' || tok[len] == '-' ||
 	    tok[len] == '+') && len < G_LLVM_NAMELEN - 1)
 		len++;
 	bcopy(tok, name, len);
 	name[len] = '\0';
 	return (len);
 }
 
 static int
 llvm_textconf_decode(u_char *data, int buflen, struct g_llvm_metadata *md)
 {
 	struct g_llvm_vg	*vg;
 	char *buf = data;
 	char *tok, *v;
 	char name[G_LLVM_NAMELEN];
 	char uuid[G_LLVM_UUIDLEN];
 	size_t len;
 
 	if (buf == NULL || *buf == '\0')
 		return (EINVAL);
 
 	tok = strsep(&buf, "\n");
 	if (tok == NULL)
 		return (EINVAL);
 	len = llvm_grab_name(name, tok);
 	if (len == 0)
 		return (EINVAL);
 
 	/* check too see if the vg has already been loaded off another disk */
 	LIST_FOREACH(vg, &vg_list, vg_next) {
 		if (strcmp(vg->vg_name, name) == 0) {
 			uuid[0] = '\0';
 			/* grab the volume group uuid */
 			while ((tok = strsep(&buf, "\n")) != NULL) {
 				if (strstr(tok, "{"))
 					break;
 				if (strstr(tok, "=")) {
 					SPLIT(v, tok, "=");
 					GRAB_STR("id", v, tok, uuid,
 					    sizeof(uuid));
 				}
 			}
 			if (strcmp(vg->vg_uuid, uuid) == 0) {
 				/* existing vg */
 				md->md_vg = vg;
 				return (0);
 			}
 			/* XXX different volume group with name clash! */
 			G_LLVM_DEBUG(0,
 			    "%s already exists, volume group not loaded", name);
 			return (EINVAL);
 		}
 	}
 
 	vg = malloc(sizeof(*vg), M_GLLVM, M_NOWAIT|M_ZERO);
 	if (vg == NULL)
 		return (ENOMEM);
 
 	strncpy(vg->vg_name, name, sizeof(vg->vg_name));
 	LIST_INIT(&vg->vg_pvs);
 	LIST_INIT(&vg->vg_lvs);
 
 #define	VOL_FOREACH(func, tok, buf, p)					\
 	while ((tok = strsep(buf, "\n")) != NULL) {			\
 		if (strstr(tok, "{")) {					\
 			func(buf, tok, p);				\
 			continue;					\
 		}							\
 		if (strstr(tok, "}"))					\
 			break;						\
 	}
 
 	while ((tok = strsep(&buf, "\n")) != NULL) {
 		if (strcmp(tok, "physical_volumes {") == 0) {
 			VOL_FOREACH(llvm_textconf_decode_pv, tok, &buf, vg);
 			continue;
 		}
 		if (strcmp(tok, "logical_volumes {") == 0) {
 			VOL_FOREACH(llvm_textconf_decode_lv, tok, &buf, vg);
 			continue;
 		}
 		if (strstr(tok, "{")) {
 			G_LLVM_DEBUG(2, "unknown section %s", tok);
 			continue;
 		}
 
 		/* parse 'key = value' lines */
 		if (strstr(tok, "=")) {
 			SPLIT(v, tok, "=");
 			GRAB_STR("id", v, tok, vg->vg_uuid, sizeof(vg->vg_uuid));
 			GRAB_INT("extent_size", v, tok, vg->vg_extentsize);
 			continue;
 		}
 	}
 	/* basic checking */
 	if (vg->vg_extentsize == 0)
 		goto bad;
 
 	md->md_vg = vg;
 	LIST_INSERT_HEAD(&vg_list, vg, vg_next);
 	G_LLVM_DEBUG(3, "vg: name=%s uuid=%s", vg->vg_name, vg->vg_uuid);
 	return(0);
 
 bad:
 	g_llvm_free_vg(vg);
 	return (-1);
 }
 #undef	VOL_FOREACH
 
 static int
 llvm_textconf_decode_pv(char **buf, char *tok, struct g_llvm_vg *vg)
 {
 	struct g_llvm_pv	*pv;
 	char *v;
 	size_t len;
 
 	if (*buf == NULL || **buf == '\0')
 		return (EINVAL);
 
 	pv = malloc(sizeof(*pv), M_GLLVM, M_NOWAIT|M_ZERO);
 	if (pv == NULL)
 		return (ENOMEM);
 
 	pv->pv_vg = vg;
 	len = 0;
 	if (tok == NULL)
 		goto bad;
 	len = llvm_grab_name(pv->pv_name, tok);
 	if (len == 0)
 		goto bad;
 
 	while ((tok = strsep(buf, "\n")) != NULL) {
 		if (strstr(tok, "{"))
 			goto bad;
 
 		if (strstr(tok, "}"))
 			break;
 
 		/* parse 'key = value' lines */
 		if (strstr(tok, "=")) {
 			SPLIT(v, tok, "=");
 			GRAB_STR("id", v, tok, pv->pv_uuid, sizeof(pv->pv_uuid));
 			GRAB_INT("pe_start", v, tok, pv->pv_start);
 			GRAB_INT("pe_count", v, tok, pv->pv_count);
 			continue;
 		}
 	}
 	if (tok == NULL)
 		goto bad;
 	/* basic checking */
 	if (pv->pv_count == 0)
 		goto bad;
 
 	LIST_INSERT_HEAD(&vg->vg_pvs, pv, pv_next);
 	G_LLVM_DEBUG(3, "pv: name=%s uuid=%s", pv->pv_name, pv->pv_uuid);
 
 	return (0);
 bad:
 	free(pv, M_GLLVM);
 	return (-1);
 }
 
 static int
 llvm_textconf_decode_lv(char **buf, char *tok, struct g_llvm_vg *vg)
 {
 	struct g_llvm_lv	*lv;
 	struct g_llvm_segment *sg;
 	char *v;
 	size_t len;
 
 	if (*buf == NULL || **buf == '\0')
 		return (EINVAL);
 
 	lv = malloc(sizeof(*lv), M_GLLVM, M_NOWAIT|M_ZERO);
 	if (lv == NULL)
 		return (ENOMEM);
 
 	lv->lv_vg = vg;
 	LIST_INIT(&lv->lv_segs);
 
 	if (tok == NULL)
 		goto bad;
 	len = llvm_grab_name(lv->lv_name, tok);
 	if (len == 0)
 		goto bad;
 
 	while ((tok = strsep(buf, "\n")) != NULL) {
 		if (strstr(tok, "{")) {
 			if (strstr(tok, "segment")) {
 				llvm_textconf_decode_sg(buf, tok, lv);
 				continue;
 			} else
 				/* unexpected section */
 				goto bad;
 		}
 
 		if (strstr(tok, "}"))
 			break;
 
 		/* parse 'key = value' lines */
 		if (strstr(tok, "=")) {
 			SPLIT(v, tok, "=");
 			GRAB_STR("id", v, tok, lv->lv_uuid, sizeof(lv->lv_uuid));
 			GRAB_INT("segment_count", v, tok, lv->lv_sgcount);
 			continue;
 		}
 	}
 	if (tok == NULL)
 		goto bad;
 	if (lv->lv_sgcount == 0 || lv->lv_sgcount != lv->lv_numsegs)
 		/* zero or incomplete segment list */
 		goto bad;
 
 	/* Optimize for only one segment on the pv */
 	lv->lv_firstsg = LIST_FIRST(&lv->lv_segs);
 	LIST_INSERT_HEAD(&vg->vg_lvs, lv, lv_next);
 	G_LLVM_DEBUG(3, "lv: name=%s uuid=%s", lv->lv_name, lv->lv_uuid);
 
 	return (0);
 bad:
 	while ((sg = LIST_FIRST(&lv->lv_segs)) != NULL) {
 		LIST_REMOVE(sg, sg_next);
 		free(sg, M_GLLVM);
 	}
 	free(lv, M_GLLVM);
 	return (-1);
 }
 
 static int
 llvm_textconf_decode_sg(char **buf, char *tok, struct g_llvm_lv *lv)
 {
 	struct g_llvm_segment *sg;
 	char *v;
 	int count = 0;
 
 	if (*buf == NULL || **buf == '\0')
 		return (EINVAL);
 
 	sg = malloc(sizeof(*sg), M_GLLVM, M_NOWAIT|M_ZERO);
 	if (sg == NULL)
 		return (ENOMEM);
 
 	while ((tok = strsep(buf, "\n")) != NULL) {
 		/* only a single linear stripe is supported */
 		if (strstr(tok, "stripe_count")) {
 			SPLIT(v, tok, "=");
 			GRAB_INT("stripe_count", v, tok, count);
 			if (count != 1)
 				goto bad;
 		}
 
 		if (strstr(tok, "{"))
 			goto bad;
 
 		if (strstr(tok, "}"))
 			break;
 
 		if (strcmp(tok, "stripes = [") == 0) {
 			tok = strsep(buf, "\n");
 			if (tok == NULL)
 				goto bad;
 
 			strsep(&tok, "\"");
 			if (tok == NULL)
 				goto bad;	/* missing open quotes */
 			v = strsep(&tok, "\"");
 			if (tok == NULL)
 				goto bad;	/* missing close quotes */
 			strncpy(sg->sg_pvname, v, sizeof(sg->sg_pvname));
 			if (*tok != ',')
 				goto bad;	/* missing comma for stripe */
 			tok++;
 
 			sg->sg_pvstart = strtol(tok, &v, 10);
 			if (v == tok)
 				/* strtol did not eat any of the buffer */
 				goto bad;
 
 			continue;
 		}
 
 		/* parse 'key = value' lines */
 		if (strstr(tok, "=")) {
 			SPLIT(v, tok, "=");
 			GRAB_INT("start_extent", v, tok, sg->sg_start);
 			GRAB_INT("extent_count", v, tok, sg->sg_count);
 			continue;
 		}
 	}
 	if (tok == NULL)
 		goto bad;
 	/* basic checking */
 	if (count != 1 || sg->sg_count == 0)
 		goto bad;
 
 	sg->sg_end = sg->sg_start + sg->sg_count - 1;
 	lv->lv_numsegs++;
 	lv->lv_extentcount += sg->sg_count;
 	LIST_INSERT_HEAD(&lv->lv_segs, sg, sg_next);
 
 	return (0);
 bad:
 	free(sg, M_GLLVM);
 	return (-1);
 }
 #undef	GRAB_INT
 #undef	GRAB_STR
 #undef	SPLIT
 
 static struct g_class g_llvm_class = {
 	.name = G_LLVM_CLASS_NAME,
 	.version = G_VERSION,
 	.init = g_llvm_init,
 	.taste = g_llvm_taste,
 	.destroy_geom = g_llvm_destroy_geom
 };
 
 DECLARE_GEOM_CLASS(g_llvm_class, g_linux_lvm);
 MODULE_VERSION(geom_linux_lvm, 0);
Index: head/sys/geom/linux_lvm/g_linux_lvm.h
===================================================================
--- head/sys/geom/linux_lvm/g_linux_lvm.h	(revision 350693)
+++ head/sys/geom/linux_lvm/g_linux_lvm.h	(revision 350694)
@@ -1,115 +1,107 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2008 Andrew Thompson <thompsa@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
-#define	G_LLVM_DEBUG(lvl, ...)	do {					\
-	if (g_llvm_debug >= (lvl)) {					\
-		printf("GEOM_LINUX_LVM");				\
-		if (g_llvm_debug > 0)					\
-			printf("[%u]", lvl);				\
-		printf(": ");						\
-		printf(__VA_ARGS__);					\
-		printf("\n");						\
-	}								\
-} while (0)
+#define	G_LLVM_DEBUG(lvl, ...) \
+    _GEOM_DEBUG("GEOM_LINUX_LVM", g_llvm_debug, (lvl), NULL, __VA_ARGS__)
 
 #define	G_LLVM_CLASS_NAME	"LINUX_LVM"
 #define	G_LLVM_NAMELEN		128
 #define	G_LLVM_UUIDLEN		40
 #define	G_LLVM_MAGIC		"\040\114\126\115\062\040\170\133" \
 				"\065\101\045\162\060\116\052\076"
 
 struct g_llvm_label {
 	uint64_t	ll_sector;
 	uint32_t	ll_crc;
 	uint32_t	ll_offset;
 	char		ll_uuid[G_LLVM_UUIDLEN];
 	uint64_t	ll_size;
 	uint64_t	ll_pestart;
 	uint64_t	ll_md_offset;
 	uint64_t	ll_md_size;
 };
 
 struct g_llvm_metadata {
 	uint32_t		md_csum;
 	uint32_t		md_version;
 	uint64_t		md_start;
 	uint64_t		md_size;
 	uint64_t		md_reloffset;
 	uint64_t		md_relsize;
 	struct g_llvm_vg	*md_vg;
 };
 
 struct g_llvm_lv {
 	LIST_ENTRY(g_llvm_lv)	lv_next;
 	struct g_llvm_vg	*lv_vg;
 	char			lv_name[G_LLVM_NAMELEN];
 	char			lv_uuid[G_LLVM_UUIDLEN];
 	int			lv_sgcount;
 	int			lv_sgactive;
 	struct g_provider	*lv_gprov;
 	int			lv_extentcount;
 	LIST_HEAD(, g_llvm_segment) lv_segs;
 	int			lv_numsegs;
 	struct g_llvm_segment	*lv_firstsg;
 };
 
 struct g_llvm_pv {
 	LIST_ENTRY(g_llvm_pv)	pv_next;
 	struct g_llvm_vg	*pv_vg;
 	char			pv_name[G_LLVM_NAMELEN];
 	char			pv_uuid[G_LLVM_UUIDLEN];
 	size_t			pv_size;
 	off_t			pv_start;
 	int			pv_count;
 	struct g_provider	*pv_gprov;
 	struct g_consumer	*pv_gcons;
 };
 
 struct g_llvm_segment {
 	LIST_ENTRY(g_llvm_segment)	sg_next;
 	int			sg_start;
 	int			sg_end;
 	int			sg_count;
 	char			sg_pvname[G_LLVM_NAMELEN];
 	struct g_llvm_pv	*sg_pv;
 	int			sg_pvstart;
 	off_t			sg_pvoffset;
 };
 
 struct g_llvm_vg {
 	LIST_ENTRY(g_llvm_vg)	vg_next;
 	char			vg_name[G_LLVM_NAMELEN];
 	char			vg_uuid[G_LLVM_UUIDLEN];
 	size_t			vg_extentsize;
 	int			vg_sectorsize;
 	struct g_geom		*vg_geom;
 	LIST_HEAD(, g_llvm_pv)	vg_pvs;
 	LIST_HEAD(, g_llvm_lv)	vg_lvs;
 };
Index: head/sys/geom/mirror/g_mirror.c
===================================================================
--- head/sys/geom/mirror/g_mirror.c	(revision 350693)
+++ head/sys/geom/mirror/g_mirror.c	(revision 350694)
@@ -1,3572 +1,3573 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2004-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bio.h>
 #include <sys/eventhandler.h>
 #include <sys/fail.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/sbuf.h>
 #include <sys/sched.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 
 #include <geom/geom.h>
+#include <geom/geom_dbg.h>
 #include <geom/mirror/g_mirror.h>
 
 FEATURE(geom_mirror, "GEOM mirroring support");
 
 static MALLOC_DEFINE(M_MIRROR, "mirror_data", "GEOM_MIRROR Data");
 
 SYSCTL_DECL(_kern_geom);
 static SYSCTL_NODE(_kern_geom, OID_AUTO, mirror, CTLFLAG_RW, 0,
     "GEOM_MIRROR stuff");
 int g_mirror_debug = 0;
 SYSCTL_INT(_kern_geom_mirror, OID_AUTO, debug, CTLFLAG_RWTUN, &g_mirror_debug, 0,
     "Debug level");
 bool g_launch_mirror_before_timeout = true;
 SYSCTL_BOOL(_kern_geom_mirror, OID_AUTO, launch_mirror_before_timeout,
     CTLFLAG_RWTUN, &g_launch_mirror_before_timeout, 0,
     "If false, force gmirror to wait out the full kern.geom.mirror.timeout "
     "before launching mirrors");
 static u_int g_mirror_timeout = 4;
 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, timeout, CTLFLAG_RWTUN, &g_mirror_timeout,
     0, "Time to wait on all mirror components");
 static u_int g_mirror_idletime = 5;
 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, idletime, CTLFLAG_RWTUN,
     &g_mirror_idletime, 0, "Mark components as clean when idling");
 static u_int g_mirror_disconnect_on_failure = 1;
 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, disconnect_on_failure, CTLFLAG_RWTUN,
     &g_mirror_disconnect_on_failure, 0, "Disconnect component on I/O failure.");
 static u_int g_mirror_syncreqs = 2;
 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, sync_requests, CTLFLAG_RDTUN,
     &g_mirror_syncreqs, 0, "Parallel synchronization I/O requests.");
 static u_int g_mirror_sync_period = 5;
 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, sync_update_period, CTLFLAG_RWTUN,
     &g_mirror_sync_period, 0,
     "Metadata update period during synchronization, in seconds");
 
 #define	MSLEEP(ident, mtx, priority, wmesg, timeout)	do {		\
 	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, (ident));	\
 	msleep((ident), (mtx), (priority), (wmesg), (timeout));		\
 	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, (ident));	\
 } while (0)
 
 static eventhandler_tag g_mirror_post_sync = NULL;
 static int g_mirror_shutdown = 0;
 
 static g_ctl_destroy_geom_t g_mirror_destroy_geom;
 static g_taste_t g_mirror_taste;
 static g_init_t g_mirror_init;
 static g_fini_t g_mirror_fini;
 static g_provgone_t g_mirror_providergone;
 static g_resize_t g_mirror_resize;
 
 struct g_class g_mirror_class = {
 	.name = G_MIRROR_CLASS_NAME,
 	.version = G_VERSION,
 	.ctlreq = g_mirror_config,
 	.taste = g_mirror_taste,
 	.destroy_geom = g_mirror_destroy_geom,
 	.init = g_mirror_init,
 	.fini = g_mirror_fini,
 	.providergone = g_mirror_providergone,
 	.resize = g_mirror_resize
 };
 
 
 static void g_mirror_destroy_provider(struct g_mirror_softc *sc);
 static int g_mirror_update_disk(struct g_mirror_disk *disk, u_int state);
 static void g_mirror_update_device(struct g_mirror_softc *sc, bool force);
 static void g_mirror_dumpconf(struct sbuf *sb, const char *indent,
     struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp);
 static int g_mirror_refresh_device(struct g_mirror_softc *sc,
     const struct g_provider *pp, const struct g_mirror_metadata *md);
 static void g_mirror_sync_reinit(const struct g_mirror_disk *disk,
     struct bio *bp, off_t offset);
 static void g_mirror_sync_stop(struct g_mirror_disk *disk, int type);
 static void g_mirror_register_request(struct g_mirror_softc *sc,
     struct bio *bp);
 static void g_mirror_sync_release(struct g_mirror_softc *sc);
 
 
 static const char *
 g_mirror_disk_state2str(int state)
 {
 
 	switch (state) {
 	case G_MIRROR_DISK_STATE_NONE:
 		return ("NONE");
 	case G_MIRROR_DISK_STATE_NEW:
 		return ("NEW");
 	case G_MIRROR_DISK_STATE_ACTIVE:
 		return ("ACTIVE");
 	case G_MIRROR_DISK_STATE_STALE:
 		return ("STALE");
 	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
 		return ("SYNCHRONIZING");
 	case G_MIRROR_DISK_STATE_DISCONNECTED:
 		return ("DISCONNECTED");
 	case G_MIRROR_DISK_STATE_DESTROY:
 		return ("DESTROY");
 	default:
 		return ("INVALID");
 	}
 }
 
 static const char *
 g_mirror_device_state2str(int state)
 {
 
 	switch (state) {
 	case G_MIRROR_DEVICE_STATE_STARTING:
 		return ("STARTING");
 	case G_MIRROR_DEVICE_STATE_RUNNING:
 		return ("RUNNING");
 	default:
 		return ("INVALID");
 	}
 }
 
 static const char *
 g_mirror_get_diskname(struct g_mirror_disk *disk)
 {
 
 	if (disk->d_consumer == NULL || disk->d_consumer->provider == NULL)
 		return ("[unknown]");
 	return (disk->d_name);
 }
 
 /*
  * --- Events handling functions ---
  * Events in geom_mirror are used to maintain disks and device status
  * from one thread to simplify locking.
  */
 static void
 g_mirror_event_free(struct g_mirror_event *ep)
 {
 
 	free(ep, M_MIRROR);
 }
 
 int
 g_mirror_event_send(void *arg, int state, int flags)
 {
 	struct g_mirror_softc *sc;
 	struct g_mirror_disk *disk;
 	struct g_mirror_event *ep;
 	int error;
 
 	ep = malloc(sizeof(*ep), M_MIRROR, M_WAITOK);
 	G_MIRROR_DEBUG(4, "%s: Sending event %p.", __func__, ep);
 	if ((flags & G_MIRROR_EVENT_DEVICE) != 0) {
 		disk = NULL;
 		sc = arg;
 	} else {
 		disk = arg;
 		sc = disk->d_softc;
 	}
 	ep->e_disk = disk;
 	ep->e_state = state;
 	ep->e_flags = flags;
 	ep->e_error = 0;
 	mtx_lock(&sc->sc_events_mtx);
 	TAILQ_INSERT_TAIL(&sc->sc_events, ep, e_next);
 	mtx_unlock(&sc->sc_events_mtx);
 	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
 	mtx_lock(&sc->sc_queue_mtx);
 	wakeup(sc);
 	mtx_unlock(&sc->sc_queue_mtx);
 	if ((flags & G_MIRROR_EVENT_DONTWAIT) != 0)
 		return (0);
 	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, ep);
 	sx_xunlock(&sc->sc_lock);
 	while ((ep->e_flags & G_MIRROR_EVENT_DONE) == 0) {
 		mtx_lock(&sc->sc_events_mtx);
 		MSLEEP(ep, &sc->sc_events_mtx, PRIBIO | PDROP, "m:event",
 		    hz * 5);
 	}
 	error = ep->e_error;
 	g_mirror_event_free(ep);
 	sx_xlock(&sc->sc_lock);
 	return (error);
 }
 
 static struct g_mirror_event *
 g_mirror_event_first(struct g_mirror_softc *sc)
 {
 	struct g_mirror_event *ep;
 
 	mtx_lock(&sc->sc_events_mtx);
 	ep = TAILQ_FIRST(&sc->sc_events);
 	mtx_unlock(&sc->sc_events_mtx);
 	return (ep);
 }
 
 static void
 g_mirror_event_remove(struct g_mirror_softc *sc, struct g_mirror_event *ep)
 {
 
 	mtx_lock(&sc->sc_events_mtx);
 	TAILQ_REMOVE(&sc->sc_events, ep, e_next);
 	mtx_unlock(&sc->sc_events_mtx);
 }
 
 static void
 g_mirror_event_cancel(struct g_mirror_disk *disk)
 {
 	struct g_mirror_softc *sc;
 	struct g_mirror_event *ep, *tmpep;
 
 	sc = disk->d_softc;
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	mtx_lock(&sc->sc_events_mtx);
 	TAILQ_FOREACH_SAFE(ep, &sc->sc_events, e_next, tmpep) {
 		if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0)
 			continue;
 		if (ep->e_disk != disk)
 			continue;
 		TAILQ_REMOVE(&sc->sc_events, ep, e_next);
 		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
 			g_mirror_event_free(ep);
 		else {
 			ep->e_error = ECANCELED;
 			wakeup(ep);
 		}
 	}
 	mtx_unlock(&sc->sc_events_mtx);
 }
 
 /*
  * Return the number of disks in given state.
  * If state is equal to -1, count all connected disks.
  */
 u_int
 g_mirror_ndisks(struct g_mirror_softc *sc, int state)
 {
 	struct g_mirror_disk *disk;
 	u_int n = 0;
 
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (state == -1 || disk->d_state == state)
 			n++;
 	}
 	return (n);
 }
 
 /*
  * Find a disk in mirror by its disk ID.
  */
 static struct g_mirror_disk *
 g_mirror_id2disk(struct g_mirror_softc *sc, uint32_t id)
 {
 	struct g_mirror_disk *disk;
 
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (disk->d_id == id)
 			return (disk);
 	}
 	return (NULL);
 }
 
 static u_int
 g_mirror_nrequests(struct g_mirror_softc *sc, struct g_consumer *cp)
 {
 	struct bio *bp;
 	u_int nreqs = 0;
 
 	mtx_lock(&sc->sc_queue_mtx);
 	TAILQ_FOREACH(bp, &sc->sc_queue, bio_queue) {
 		if (bp->bio_from == cp)
 			nreqs++;
 	}
 	mtx_unlock(&sc->sc_queue_mtx);
 	return (nreqs);
 }
 
 static int
 g_mirror_is_busy(struct g_mirror_softc *sc, struct g_consumer *cp)
 {
 
 	if (cp->index > 0) {
 		G_MIRROR_DEBUG(2,
 		    "I/O requests for %s exist, can't destroy it now.",
 		    cp->provider->name);
 		return (1);
 	}
 	if (g_mirror_nrequests(sc, cp) > 0) {
 		G_MIRROR_DEBUG(2,
 		    "I/O requests for %s in queue, can't destroy it now.",
 		    cp->provider->name);
 		return (1);
 	}
 	return (0);
 }
 
 static void
 g_mirror_destroy_consumer(void *arg, int flags __unused)
 {
 	struct g_consumer *cp;
 
 	g_topology_assert();
 
 	cp = arg;
 	G_MIRROR_DEBUG(1, "Consumer %s destroyed.", cp->provider->name);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 }
 
 static void
 g_mirror_kill_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
 {
 	struct g_provider *pp;
 	int retaste_wait;
 
 	g_topology_assert();
 
 	cp->private = NULL;
 	if (g_mirror_is_busy(sc, cp))
 		return;
 	pp = cp->provider;
 	retaste_wait = 0;
 	if (cp->acw == 1) {
 		if ((pp->geom->flags & G_GEOM_WITHER) == 0)
 			retaste_wait = 1;
 	}
 	G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", pp->name, -cp->acr,
 	    -cp->acw, -cp->ace, 0);
 	if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
 		g_access(cp, -cp->acr, -cp->acw, -cp->ace);
 	if (retaste_wait) {
 		/*
 		 * After retaste event was send (inside g_access()), we can send
 		 * event to detach and destroy consumer.
 		 * A class, which has consumer to the given provider connected
 		 * will not receive retaste event for the provider.
 		 * This is the way how I ignore retaste events when I close
 		 * consumers opened for write: I detach and destroy consumer
 		 * after retaste event is sent.
 		 */
 		g_post_event(g_mirror_destroy_consumer, cp, M_WAITOK, NULL);
 		return;
 	}
 	G_MIRROR_DEBUG(1, "Consumer %s destroyed.", pp->name);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 }
 
 static int
 g_mirror_connect_disk(struct g_mirror_disk *disk, struct g_provider *pp)
 {
 	struct g_consumer *cp;
 	int error;
 
 	g_topology_assert_not();
 	KASSERT(disk->d_consumer == NULL,
 	    ("Disk already connected (device %s).", disk->d_softc->sc_name));
 
 	g_topology_lock();
 	cp = g_new_consumer(disk->d_softc->sc_geom);
 	cp->flags |= G_CF_DIRECT_RECEIVE;
 	error = g_attach(cp, pp);
 	if (error != 0) {
 		g_destroy_consumer(cp);
 		g_topology_unlock();
 		return (error);
 	}
 	error = g_access(cp, 1, 1, 1);
 	if (error != 0) {
 		g_detach(cp);
 		g_destroy_consumer(cp);
 		g_topology_unlock();
 		G_MIRROR_DEBUG(0, "Cannot open consumer %s (error=%d).",
 		    pp->name, error);
 		return (error);
 	}
 	g_topology_unlock();
 	disk->d_consumer = cp;
 	disk->d_consumer->private = disk;
 	disk->d_consumer->index = 0;
 
 	G_MIRROR_DEBUG(2, "Disk %s connected.", g_mirror_get_diskname(disk));
 	return (0);
 }
 
 static void
 g_mirror_disconnect_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
 {
 
 	g_topology_assert();
 
 	if (cp == NULL)
 		return;
 	if (cp->provider != NULL)
 		g_mirror_kill_consumer(sc, cp);
 	else
 		g_destroy_consumer(cp);
 }
 
 /*
  * Initialize disk. This means allocate memory, create consumer, attach it
  * to the provider and open access (r1w1e1) to it.
  */
 static struct g_mirror_disk *
 g_mirror_init_disk(struct g_mirror_softc *sc, struct g_provider *pp,
     struct g_mirror_metadata *md, int *errorp)
 {
 	struct g_mirror_disk *disk;
 	int i, error;
 
 	disk = malloc(sizeof(*disk), M_MIRROR, M_NOWAIT | M_ZERO);
 	if (disk == NULL) {
 		error = ENOMEM;
 		goto fail;
 	}
 	disk->d_softc = sc;
 	error = g_mirror_connect_disk(disk, pp);
 	if (error != 0)
 		goto fail;
 	disk->d_id = md->md_did;
 	disk->d_state = G_MIRROR_DISK_STATE_NONE;
 	disk->d_priority = md->md_priority;
 	disk->d_flags = md->md_dflags;
 	error = g_getattr("GEOM::candelete", disk->d_consumer, &i);
 	if (error == 0 && i != 0)
 		disk->d_flags |= G_MIRROR_DISK_FLAG_CANDELETE;
 	if (md->md_provider[0] != '\0')
 		disk->d_flags |= G_MIRROR_DISK_FLAG_HARDCODED;
 	disk->d_sync.ds_consumer = NULL;
 	disk->d_sync.ds_offset = md->md_sync_offset;
 	disk->d_sync.ds_offset_done = md->md_sync_offset;
 	disk->d_sync.ds_update_ts = time_uptime;
 	disk->d_genid = md->md_genid;
 	disk->d_sync.ds_syncid = md->md_syncid;
 	disk->d_init_ndisks = md->md_all;
 	disk->d_init_slice = md->md_slice;
 	disk->d_init_balance = md->md_balance;
 	disk->d_init_mediasize = md->md_mediasize;
 	if (errorp != NULL)
 		*errorp = 0;
 	return (disk);
 fail:
 	if (errorp != NULL)
 		*errorp = error;
 	if (disk != NULL)
 		free(disk, M_MIRROR);
 	return (NULL);
 }
 
 static void
 g_mirror_destroy_disk(struct g_mirror_disk *disk)
 {
 	struct g_mirror_softc *sc;
 
 	g_topology_assert_not();
 	sc = disk->d_softc;
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	g_topology_lock();
 	LIST_REMOVE(disk, d_next);
 	g_topology_unlock();
 	g_mirror_event_cancel(disk);
 	if (sc->sc_hint == disk)
 		sc->sc_hint = NULL;
 	switch (disk->d_state) {
 	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
 		g_mirror_sync_stop(disk, 1);
 		/* FALLTHROUGH */
 	case G_MIRROR_DISK_STATE_NEW:
 	case G_MIRROR_DISK_STATE_STALE:
 	case G_MIRROR_DISK_STATE_ACTIVE:
 		g_topology_lock();
 		g_mirror_disconnect_consumer(sc, disk->d_consumer);
 		g_topology_unlock();
 		free(disk, M_MIRROR);
 		break;
 	default:
 		KASSERT(0 == 1, ("Wrong disk state (%s, %s).",
 		    g_mirror_get_diskname(disk),
 		    g_mirror_disk_state2str(disk->d_state)));
 	}
 }
 
 static void
 g_mirror_free_device(struct g_mirror_softc *sc)
 {
 
 	g_topology_assert();
 
 	mtx_destroy(&sc->sc_queue_mtx);
 	mtx_destroy(&sc->sc_events_mtx);
 	mtx_destroy(&sc->sc_done_mtx);
 	sx_destroy(&sc->sc_lock);
 	free(sc, M_MIRROR);
 }
 
 static void
 g_mirror_providergone(struct g_provider *pp)
 {
 	struct g_mirror_softc *sc = pp->private;
 
 	if ((--sc->sc_refcnt) == 0)
 		g_mirror_free_device(sc);
 }
 
 static void
 g_mirror_destroy_device(struct g_mirror_softc *sc)
 {
 	struct g_mirror_disk *disk;
 	struct g_mirror_event *ep;
 	struct g_geom *gp;
 	struct g_consumer *cp, *tmpcp;
 
 	g_topology_assert_not();
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	gp = sc->sc_geom;
 	if (sc->sc_provider != NULL)
 		g_mirror_destroy_provider(sc);
 	for (disk = LIST_FIRST(&sc->sc_disks); disk != NULL;
 	    disk = LIST_FIRST(&sc->sc_disks)) {
 		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
 		g_mirror_update_metadata(disk);
 		g_mirror_destroy_disk(disk);
 	}
 	while ((ep = g_mirror_event_first(sc)) != NULL) {
 		g_mirror_event_remove(sc, ep);
 		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
 			g_mirror_event_free(ep);
 		else {
 			ep->e_error = ECANCELED;
 			ep->e_flags |= G_MIRROR_EVENT_DONE;
 			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, ep);
 			mtx_lock(&sc->sc_events_mtx);
 			wakeup(ep);
 			mtx_unlock(&sc->sc_events_mtx);
 		}
 	}
 	callout_drain(&sc->sc_callout);
 
 	g_topology_lock();
 	LIST_FOREACH_SAFE(cp, &sc->sc_sync.ds_geom->consumer, consumer, tmpcp) {
 		g_mirror_disconnect_consumer(sc, cp);
 	}
 	g_wither_geom(sc->sc_sync.ds_geom, ENXIO);
 	G_MIRROR_DEBUG(0, "Device %s destroyed.", gp->name);
 	g_wither_geom(gp, ENXIO);
 	sx_xunlock(&sc->sc_lock);
 	if ((--sc->sc_refcnt) == 0)
 		g_mirror_free_device(sc);
 	g_topology_unlock();
 }
 
 static void
 g_mirror_orphan(struct g_consumer *cp)
 {
 	struct g_mirror_disk *disk;
 
 	g_topology_assert();
 
 	disk = cp->private;
 	if (disk == NULL)
 		return;
 	disk->d_softc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
 	g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
 	    G_MIRROR_EVENT_DONTWAIT);
 }
 
 /*
  * Function should return the next active disk on the list.
  * It is possible that it will be the same disk as given.
  * If there are no active disks on list, NULL is returned.
  */
 static __inline struct g_mirror_disk *
 g_mirror_find_next(struct g_mirror_softc *sc, struct g_mirror_disk *disk)
 {
 	struct g_mirror_disk *dp;
 
 	for (dp = LIST_NEXT(disk, d_next); dp != disk;
 	    dp = LIST_NEXT(dp, d_next)) {
 		if (dp == NULL)
 			dp = LIST_FIRST(&sc->sc_disks);
 		if (dp->d_state == G_MIRROR_DISK_STATE_ACTIVE)
 			break;
 	}
 	if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
 		return (NULL);
 	return (dp);
 }
 
 static struct g_mirror_disk *
 g_mirror_get_disk(struct g_mirror_softc *sc)
 {
 	struct g_mirror_disk *disk;
 
 	if (sc->sc_hint == NULL) {
 		sc->sc_hint = LIST_FIRST(&sc->sc_disks);
 		if (sc->sc_hint == NULL)
 			return (NULL);
 	}
 	disk = sc->sc_hint;
 	if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE) {
 		disk = g_mirror_find_next(sc, disk);
 		if (disk == NULL)
 			return (NULL);
 	}
 	sc->sc_hint = g_mirror_find_next(sc, disk);
 	return (disk);
 }
 
 static int
 g_mirror_write_metadata(struct g_mirror_disk *disk,
     struct g_mirror_metadata *md)
 {
 	struct g_mirror_softc *sc;
 	struct g_consumer *cp;
 	off_t offset, length;
 	u_char *sector;
 	int error = 0;
 
 	g_topology_assert_not();
 	sc = disk->d_softc;
 	sx_assert(&sc->sc_lock, SX_LOCKED);
 
 	cp = disk->d_consumer;
 	KASSERT(cp != NULL, ("NULL consumer (%s).", sc->sc_name));
 	KASSERT(cp->provider != NULL, ("NULL provider (%s).", sc->sc_name));
 	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
 	    ("Consumer %s closed? (r%dw%de%d).", cp->provider->name, cp->acr,
 	    cp->acw, cp->ace));
 	length = cp->provider->sectorsize;
 	offset = cp->provider->mediasize - length;
 	sector = malloc((size_t)length, M_MIRROR, M_WAITOK | M_ZERO);
 	if (md != NULL &&
 	    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_WIPE) == 0) {
 		/*
 		 * Handle the case, when the size of parent provider reduced.
 		 */
 		if (offset < md->md_mediasize)
 			error = ENOSPC;
 		else
 			mirror_metadata_encode(md, sector);
 	}
 	KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_metadata_write, error);
 	if (error == 0)
 		error = g_write_data(cp, offset, sector, length);
 	free(sector, M_MIRROR);
 	if (error != 0) {
 		if ((disk->d_flags & G_MIRROR_DISK_FLAG_BROKEN) == 0) {
 			disk->d_flags |= G_MIRROR_DISK_FLAG_BROKEN;
 			G_MIRROR_DEBUG(0, "Cannot write metadata on %s "
 			    "(device=%s, error=%d).",
 			    g_mirror_get_diskname(disk), sc->sc_name, error);
 		} else {
 			G_MIRROR_DEBUG(1, "Cannot write metadata on %s "
 			    "(device=%s, error=%d).",
 			    g_mirror_get_diskname(disk), sc->sc_name, error);
 		}
 		if (g_mirror_disconnect_on_failure &&
 		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 1) {
 			sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
 			g_mirror_event_send(disk,
 			    G_MIRROR_DISK_STATE_DISCONNECTED,
 			    G_MIRROR_EVENT_DONTWAIT);
 		}
 	}
 	return (error);
 }
 
 static int
 g_mirror_clear_metadata(struct g_mirror_disk *disk)
 {
 	int error;
 
 	g_topology_assert_not();
 	sx_assert(&disk->d_softc->sc_lock, SX_LOCKED);
 
 	if (disk->d_softc->sc_type != G_MIRROR_TYPE_AUTOMATIC)
 		return (0);
 	error = g_mirror_write_metadata(disk, NULL);
 	if (error == 0) {
 		G_MIRROR_DEBUG(2, "Metadata on %s cleared.",
 		    g_mirror_get_diskname(disk));
 	} else {
 		G_MIRROR_DEBUG(0,
 		    "Cannot clear metadata on disk %s (error=%d).",
 		    g_mirror_get_diskname(disk), error);
 	}
 	return (error);
 }
 
 void
 g_mirror_fill_metadata(struct g_mirror_softc *sc, struct g_mirror_disk *disk,
     struct g_mirror_metadata *md)
 {
 
 	strlcpy(md->md_magic, G_MIRROR_MAGIC, sizeof(md->md_magic));
 	md->md_version = G_MIRROR_VERSION;
 	strlcpy(md->md_name, sc->sc_name, sizeof(md->md_name));
 	md->md_mid = sc->sc_id;
 	md->md_all = sc->sc_ndisks;
 	md->md_slice = sc->sc_slice;
 	md->md_balance = sc->sc_balance;
 	md->md_genid = sc->sc_genid;
 	md->md_mediasize = sc->sc_mediasize;
 	md->md_sectorsize = sc->sc_sectorsize;
 	md->md_mflags = (sc->sc_flags & G_MIRROR_DEVICE_FLAG_MASK);
 	bzero(md->md_provider, sizeof(md->md_provider));
 	if (disk == NULL) {
 		md->md_did = arc4random();
 		md->md_priority = 0;
 		md->md_syncid = 0;
 		md->md_dflags = 0;
 		md->md_sync_offset = 0;
 		md->md_provsize = 0;
 	} else {
 		md->md_did = disk->d_id;
 		md->md_priority = disk->d_priority;
 		md->md_syncid = disk->d_sync.ds_syncid;
 		md->md_dflags = (disk->d_flags & G_MIRROR_DISK_FLAG_MASK);
 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
 			md->md_sync_offset = disk->d_sync.ds_offset_done;
 		else
 			md->md_sync_offset = 0;
 		if ((disk->d_flags & G_MIRROR_DISK_FLAG_HARDCODED) != 0) {
 			strlcpy(md->md_provider,
 			    disk->d_consumer->provider->name,
 			    sizeof(md->md_provider));
 		}
 		md->md_provsize = disk->d_consumer->provider->mediasize;
 	}
 }
 
 void
 g_mirror_update_metadata(struct g_mirror_disk *disk)
 {
 	struct g_mirror_softc *sc;
 	struct g_mirror_metadata md;
 	int error;
 
 	g_topology_assert_not();
 	sc = disk->d_softc;
 	sx_assert(&sc->sc_lock, SX_LOCKED);
 
 	if (sc->sc_type != G_MIRROR_TYPE_AUTOMATIC)
 		return;
 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_WIPE) == 0)
 		g_mirror_fill_metadata(sc, disk, &md);
 	error = g_mirror_write_metadata(disk, &md);
 	if (error == 0) {
 		G_MIRROR_DEBUG(2, "Metadata on %s updated.",
 		    g_mirror_get_diskname(disk));
 	} else {
 		G_MIRROR_DEBUG(0,
 		    "Cannot update metadata on disk %s (error=%d).",
 		    g_mirror_get_diskname(disk), error);
 	}
 }
 
 static void
 g_mirror_bump_syncid(struct g_mirror_softc *sc)
 {
 	struct g_mirror_disk *disk;
 
 	g_topology_assert_not();
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 	KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0,
 	    ("%s called with no active disks (device=%s).", __func__,
 	    sc->sc_name));
 
 	sc->sc_syncid++;
 	G_MIRROR_DEBUG(1, "Device %s: syncid bumped to %u.", sc->sc_name,
 	    sc->sc_syncid);
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
 		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
 			disk->d_sync.ds_syncid = sc->sc_syncid;
 			g_mirror_update_metadata(disk);
 		}
 	}
 }
 
 static void
 g_mirror_bump_genid(struct g_mirror_softc *sc)
 {
 	struct g_mirror_disk *disk;
 
 	g_topology_assert_not();
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 	KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0,
 	    ("%s called with no active disks (device=%s).", __func__,
 	    sc->sc_name));
 
 	sc->sc_genid++;
 	G_MIRROR_DEBUG(1, "Device %s: genid bumped to %u.", sc->sc_name,
 	    sc->sc_genid);
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
 		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
 			disk->d_genid = sc->sc_genid;
 			g_mirror_update_metadata(disk);
 		}
 	}
 }
 
 static int
 g_mirror_idle(struct g_mirror_softc *sc, int acw)
 {
 	struct g_mirror_disk *disk;
 	int timeout;
 
 	g_topology_assert_not();
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	if (sc->sc_provider == NULL)
 		return (0);
 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
 		return (0);
 	if (sc->sc_idle)
 		return (0);
 	if (sc->sc_writes > 0)
 		return (0);
 	if (acw > 0 || (acw == -1 && sc->sc_provider->acw > 0)) {
 		timeout = g_mirror_idletime - (time_uptime - sc->sc_last_write);
 		if (!g_mirror_shutdown && timeout > 0)
 			return (timeout);
 	}
 	sc->sc_idle = 1;
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
 			continue;
 		G_MIRROR_DEBUG(2, "Disk %s (device %s) marked as clean.",
 		    g_mirror_get_diskname(disk), sc->sc_name);
 		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
 		g_mirror_update_metadata(disk);
 	}
 	return (0);
 }
 
 static void
 g_mirror_unidle(struct g_mirror_softc *sc)
 {
 	struct g_mirror_disk *disk;
 
 	g_topology_assert_not();
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
 		return;
 	sc->sc_idle = 0;
 	sc->sc_last_write = time_uptime;
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
 			continue;
 		G_MIRROR_DEBUG(2, "Disk %s (device %s) marked as dirty.",
 		    g_mirror_get_diskname(disk), sc->sc_name);
 		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
 		g_mirror_update_metadata(disk);
 	}
 }
 
 static void
 g_mirror_done(struct bio *bp)
 {
 	struct g_mirror_softc *sc;
 
 	sc = bp->bio_from->geom->softc;
 	bp->bio_cflags = G_MIRROR_BIO_FLAG_REGULAR;
 	mtx_lock(&sc->sc_queue_mtx);
 	TAILQ_INSERT_TAIL(&sc->sc_queue, bp, bio_queue);
 	mtx_unlock(&sc->sc_queue_mtx);
 	wakeup(sc);
 }
 
 static void
 g_mirror_regular_request_error(struct g_mirror_softc *sc,
     struct g_mirror_disk *disk, struct bio *bp)
 {
 
 	if (bp->bio_cmd == BIO_FLUSH && bp->bio_error == EOPNOTSUPP)
 		return;
 
 	if ((disk->d_flags & G_MIRROR_DISK_FLAG_BROKEN) == 0) {
 		disk->d_flags |= G_MIRROR_DISK_FLAG_BROKEN;
 		G_MIRROR_LOGREQ(0, bp, "Request failed (error=%d).",
 		    bp->bio_error);
 	} else {
 		G_MIRROR_LOGREQ(1, bp, "Request failed (error=%d).",
 		    bp->bio_error);
 	}
 	if (g_mirror_disconnect_on_failure &&
 	    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 1) {
 		if (bp->bio_error == ENXIO &&
 		    bp->bio_cmd == BIO_READ)
 			sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
 		else if (bp->bio_error == ENXIO)
 			sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID_NOW;
 		else
 			sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
 		g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
 		    G_MIRROR_EVENT_DONTWAIT);
 	}
 }
 
 static void
 g_mirror_regular_request(struct g_mirror_softc *sc, struct bio *bp)
 {
 	struct g_mirror_disk *disk;
 	struct bio *pbp;
 
 	g_topology_assert_not();
 	KASSERT(sc->sc_provider == bp->bio_parent->bio_to,
 	    ("regular request %p with unexpected origin", bp));
 
 	pbp = bp->bio_parent;
 	bp->bio_from->index--;
 	if (bp->bio_cmd == BIO_WRITE || bp->bio_cmd == BIO_DELETE)
 		sc->sc_writes--;
 	disk = bp->bio_from->private;
 	if (disk == NULL) {
 		g_topology_lock();
 		g_mirror_kill_consumer(sc, bp->bio_from);
 		g_topology_unlock();
 	}
 
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_regular_request_read,
 		    bp->bio_error);
 		break;
 	case BIO_WRITE:
 		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_regular_request_write,
 		    bp->bio_error);
 		break;
 	case BIO_DELETE:
 		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_regular_request_delete,
 		    bp->bio_error);
 		break;
 	case BIO_FLUSH:
 		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_regular_request_flush,
 		    bp->bio_error);
 		break;
 	}
 
 	pbp->bio_inbed++;
 	KASSERT(pbp->bio_inbed <= pbp->bio_children,
 	    ("bio_inbed (%u) is bigger than bio_children (%u).", pbp->bio_inbed,
 	    pbp->bio_children));
 	if (bp->bio_error == 0 && pbp->bio_error == 0) {
 		G_MIRROR_LOGREQ(3, bp, "Request delivered.");
 		g_destroy_bio(bp);
 		if (pbp->bio_children == pbp->bio_inbed) {
 			G_MIRROR_LOGREQ(3, pbp, "Request delivered.");
 			pbp->bio_completed = pbp->bio_length;
 			if (pbp->bio_cmd == BIO_WRITE ||
 			    pbp->bio_cmd == BIO_DELETE) {
 				TAILQ_REMOVE(&sc->sc_inflight, pbp, bio_queue);
 				/* Release delayed sync requests if possible. */
 				g_mirror_sync_release(sc);
 			}
 			g_io_deliver(pbp, pbp->bio_error);
 		}
 		return;
 	} else if (bp->bio_error != 0) {
 		if (pbp->bio_error == 0)
 			pbp->bio_error = bp->bio_error;
 		if (disk != NULL)
 			g_mirror_regular_request_error(sc, disk, bp);
 		switch (pbp->bio_cmd) {
 		case BIO_DELETE:
 		case BIO_WRITE:
 		case BIO_FLUSH:
 			pbp->bio_inbed--;
 			pbp->bio_children--;
 			break;
 		}
 	}
 	g_destroy_bio(bp);
 
 	switch (pbp->bio_cmd) {
 	case BIO_READ:
 		if (pbp->bio_inbed < pbp->bio_children)
 			break;
 		if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 1)
 			g_io_deliver(pbp, pbp->bio_error);
 		else {
 			pbp->bio_error = 0;
 			mtx_lock(&sc->sc_queue_mtx);
 			TAILQ_INSERT_TAIL(&sc->sc_queue, pbp, bio_queue);
 			mtx_unlock(&sc->sc_queue_mtx);
 			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
 			wakeup(sc);
 		}
 		break;
 	case BIO_DELETE:
 	case BIO_WRITE:
 	case BIO_FLUSH:
 		if (pbp->bio_children == 0) {
 			/*
 			 * All requests failed.
 			 */
 		} else if (pbp->bio_inbed < pbp->bio_children) {
 			/* Do nothing. */
 			break;
 		} else if (pbp->bio_children == pbp->bio_inbed) {
 			/* Some requests succeeded. */
 			pbp->bio_error = 0;
 			pbp->bio_completed = pbp->bio_length;
 		}
 		if (pbp->bio_cmd == BIO_WRITE || pbp->bio_cmd == BIO_DELETE) {
 			TAILQ_REMOVE(&sc->sc_inflight, pbp, bio_queue);
 			/* Release delayed sync requests if possible. */
 			g_mirror_sync_release(sc);
 		}
 		g_io_deliver(pbp, pbp->bio_error);
 		break;
 	default:
 		KASSERT(1 == 0, ("Invalid request: %u.", pbp->bio_cmd));
 		break;
 	}
 }
 
 static void
 g_mirror_sync_done(struct bio *bp)
 {
 	struct g_mirror_softc *sc;
 
 	G_MIRROR_LOGREQ(3, bp, "Synchronization request delivered.");
 	sc = bp->bio_from->geom->softc;
 	bp->bio_cflags = G_MIRROR_BIO_FLAG_SYNC;
 	mtx_lock(&sc->sc_queue_mtx);
 	TAILQ_INSERT_TAIL(&sc->sc_queue, bp, bio_queue);
 	mtx_unlock(&sc->sc_queue_mtx);
 	wakeup(sc);
 }
 
 static void
 g_mirror_candelete(struct bio *bp)
 {
 	struct g_mirror_softc *sc;
 	struct g_mirror_disk *disk;
 	int val;
 
 	sc = bp->bio_to->private;
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (disk->d_flags & G_MIRROR_DISK_FLAG_CANDELETE)
 			break;
 	}
 	val = disk != NULL;
 	g_handleattr(bp, "GEOM::candelete", &val, sizeof(val));
 }
 
 static void
 g_mirror_kernel_dump(struct bio *bp)
 {
 	struct g_mirror_softc *sc;
 	struct g_mirror_disk *disk;
 	struct bio *cbp;
 	struct g_kerneldump *gkd;
 
 	/*
 	 * We configure dumping to the first component, because this component
 	 * will be used for reading with 'prefer' balance algorithm.
 	 * If the component with the highest priority is currently disconnected
 	 * we will not be able to read the dump after the reboot if it will be
 	 * connected and synchronized later. Can we do something better?
 	 */
 	sc = bp->bio_to->private;
 	disk = LIST_FIRST(&sc->sc_disks);
 
 	gkd = (struct g_kerneldump *)bp->bio_data;
 	if (gkd->length > bp->bio_to->mediasize)
 		gkd->length = bp->bio_to->mediasize;
 	cbp = g_clone_bio(bp);
 	if (cbp == NULL) {
 		g_io_deliver(bp, ENOMEM);
 		return;
 	}
 	cbp->bio_done = g_std_done;
 	g_io_request(cbp, disk->d_consumer);
 	G_MIRROR_DEBUG(1, "Kernel dump will go to %s.",
 	    g_mirror_get_diskname(disk));
 }
 
 static void
 g_mirror_start(struct bio *bp)
 {
 	struct g_mirror_softc *sc;
 
 	sc = bp->bio_to->private;
 	/*
 	 * If sc == NULL or there are no valid disks, provider's error
 	 * should be set and g_mirror_start() should not be called at all.
 	 */
 	KASSERT(sc != NULL && sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
 	    ("Provider's error should be set (error=%d)(mirror=%s).",
 	    bp->bio_to->error, bp->bio_to->name));
 	G_MIRROR_LOGREQ(3, bp, "Request received.");
 
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 	case BIO_WRITE:
 	case BIO_DELETE:
 	case BIO_FLUSH:
 		break;
 	case BIO_GETATTR:
 		if (!strcmp(bp->bio_attribute, "GEOM::candelete")) {
 			g_mirror_candelete(bp);
 			return;
 		} else if (strcmp("GEOM::kerneldump", bp->bio_attribute) == 0) {
 			g_mirror_kernel_dump(bp);
 			return;
 		}
 		/* FALLTHROUGH */
 	default:
 		g_io_deliver(bp, EOPNOTSUPP);
 		return;
 	}
 	mtx_lock(&sc->sc_queue_mtx);
 	if (bp->bio_to->error != 0) {
 		mtx_unlock(&sc->sc_queue_mtx);
 		g_io_deliver(bp, bp->bio_to->error);
 		return;
 	}
 	TAILQ_INSERT_TAIL(&sc->sc_queue, bp, bio_queue);
 	mtx_unlock(&sc->sc_queue_mtx);
 	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
 	wakeup(sc);
 }
 
 /*
  * Return TRUE if the given request is colliding with a in-progress
  * synchronization request.
  */
 static bool
 g_mirror_sync_collision(struct g_mirror_softc *sc, struct bio *bp)
 {
 	struct g_mirror_disk *disk;
 	struct bio *sbp;
 	off_t rstart, rend, sstart, send;
 	u_int i;
 
 	if (sc->sc_sync.ds_ndisks == 0)
 		return (false);
 	rstart = bp->bio_offset;
 	rend = bp->bio_offset + bp->bio_length;
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (disk->d_state != G_MIRROR_DISK_STATE_SYNCHRONIZING)
 			continue;
 		for (i = 0; i < g_mirror_syncreqs; i++) {
 			sbp = disk->d_sync.ds_bios[i];
 			if (sbp == NULL)
 				continue;
 			sstart = sbp->bio_offset;
 			send = sbp->bio_offset + sbp->bio_length;
 			if (rend > sstart && rstart < send)
 				return (true);
 		}
 	}
 	return (false);
 }
 
 /*
  * Return TRUE if the given sync request is colliding with a in-progress regular
  * request.
  */
 static bool
 g_mirror_regular_collision(struct g_mirror_softc *sc, struct bio *sbp)
 {
 	off_t rstart, rend, sstart, send;
 	struct bio *bp;
 
 	if (sc->sc_sync.ds_ndisks == 0)
 		return (false);
 	sstart = sbp->bio_offset;
 	send = sbp->bio_offset + sbp->bio_length;
 	TAILQ_FOREACH(bp, &sc->sc_inflight, bio_queue) {
 		rstart = bp->bio_offset;
 		rend = bp->bio_offset + bp->bio_length;
 		if (rend > sstart && rstart < send)
 			return (true);
 	}
 	return (false);
 }
 
 /*
  * Puts regular request onto delayed queue.
  */
 static void
 g_mirror_regular_delay(struct g_mirror_softc *sc, struct bio *bp)
 {
 
 	G_MIRROR_LOGREQ(2, bp, "Delaying request.");
 	TAILQ_INSERT_TAIL(&sc->sc_regular_delayed, bp, bio_queue);
 }
 
 /*
  * Puts synchronization request onto delayed queue.
  */
 static void
 g_mirror_sync_delay(struct g_mirror_softc *sc, struct bio *bp)
 {
 
 	G_MIRROR_LOGREQ(2, bp, "Delaying synchronization request.");
 	TAILQ_INSERT_TAIL(&sc->sc_sync_delayed, bp, bio_queue);
 }
 
 /*
  * Requeue delayed regular requests.
  */
 static void
 g_mirror_regular_release(struct g_mirror_softc *sc)
 {
 	struct bio *bp;
 
 	if ((bp = TAILQ_FIRST(&sc->sc_regular_delayed)) == NULL)
 		return;
 	if (g_mirror_sync_collision(sc, bp))
 		return;
 
 	G_MIRROR_DEBUG(2, "Requeuing regular requests after collision.");
 	mtx_lock(&sc->sc_queue_mtx);
 	TAILQ_CONCAT(&sc->sc_regular_delayed, &sc->sc_queue, bio_queue);
 	TAILQ_SWAP(&sc->sc_regular_delayed, &sc->sc_queue, bio, bio_queue);
 	mtx_unlock(&sc->sc_queue_mtx);
 }
 
 /*
  * Releases delayed sync requests which don't collide anymore with regular
  * requests.
  */
 static void
 g_mirror_sync_release(struct g_mirror_softc *sc)
 {
 	struct bio *bp, *bp2;
 
 	TAILQ_FOREACH_SAFE(bp, &sc->sc_sync_delayed, bio_queue, bp2) {
 		if (g_mirror_regular_collision(sc, bp))
 			continue;
 		TAILQ_REMOVE(&sc->sc_sync_delayed, bp, bio_queue);
 		G_MIRROR_LOGREQ(2, bp,
 		    "Releasing delayed synchronization request.");
 		g_io_request(bp, bp->bio_from);
 	}
 }
 
 /*
  * Free a synchronization request and clear its slot in the array.
  */
 static void
 g_mirror_sync_request_free(struct g_mirror_disk *disk, struct bio *bp)
 {
 	int idx;
 
 	if (disk != NULL && disk->d_sync.ds_bios != NULL) {
 		idx = (int)(uintptr_t)bp->bio_caller1;
 		KASSERT(disk->d_sync.ds_bios[idx] == bp,
 		    ("unexpected sync BIO at %p:%d", disk, idx));
 		disk->d_sync.ds_bios[idx] = NULL;
 	}
 	free(bp->bio_data, M_MIRROR);
 	g_destroy_bio(bp);
 }
 
 /*
  * Handle synchronization requests.
  * Every synchronization request is a two-step process: first, a read request is
  * sent to the mirror provider via the sync consumer. If that request completes
  * successfully, it is converted to a write and sent to the disk being
  * synchronized. If the write also completes successfully, the synchronization
  * offset is advanced and a new read request is submitted.
  */
 static void
 g_mirror_sync_request(struct g_mirror_softc *sc, struct bio *bp)
 {
 	struct g_mirror_disk *disk;
 	struct g_mirror_disk_sync *sync;
 
 	KASSERT((bp->bio_cmd == BIO_READ &&
 	    bp->bio_from->geom == sc->sc_sync.ds_geom) ||
 	    (bp->bio_cmd == BIO_WRITE && bp->bio_from->geom == sc->sc_geom),
 	    ("Sync BIO %p with unexpected origin", bp));
 
 	bp->bio_from->index--;
 	disk = bp->bio_from->private;
 	if (disk == NULL) {
 		sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */
 		g_topology_lock();
 		g_mirror_kill_consumer(sc, bp->bio_from);
 		g_topology_unlock();
 		g_mirror_sync_request_free(NULL, bp);
 		sx_xlock(&sc->sc_lock);
 		return;
 	}
 
 	sync = &disk->d_sync;
 
 	/*
 	 * Synchronization request.
 	 */
 	switch (bp->bio_cmd) {
 	case BIO_READ: {
 		struct g_consumer *cp;
 
 		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_sync_request_read,
 		    bp->bio_error);
 
 		if (bp->bio_error != 0) {
 			G_MIRROR_LOGREQ(0, bp,
 			    "Synchronization request failed (error=%d).",
 			    bp->bio_error);
 
 			/*
 			 * The read error will trigger a syncid bump, so there's
 			 * no need to do that here.
 			 *
 			 * The read error handling for regular requests will
 			 * retry the read from all active mirrors before passing
 			 * the error back up, so there's no need to retry here.
 			 */
 			g_mirror_sync_request_free(disk, bp);
 			g_mirror_event_send(disk,
 			    G_MIRROR_DISK_STATE_DISCONNECTED,
 			    G_MIRROR_EVENT_DONTWAIT);
 			return;
 		}
 		G_MIRROR_LOGREQ(3, bp,
 		    "Synchronization request half-finished.");
 		bp->bio_cmd = BIO_WRITE;
 		bp->bio_cflags = 0;
 		cp = disk->d_consumer;
 		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
 		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
 		    cp->acr, cp->acw, cp->ace));
 		cp->index++;
 		g_io_request(bp, cp);
 		return;
 	}
 	case BIO_WRITE: {
 		off_t offset;
 		int i;
 
 		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_sync_request_write,
 		    bp->bio_error);
 
 		if (bp->bio_error != 0) {
 			G_MIRROR_LOGREQ(0, bp,
 			    "Synchronization request failed (error=%d).",
 			    bp->bio_error);
 			g_mirror_sync_request_free(disk, bp);
 			sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
 			g_mirror_event_send(disk,
 			    G_MIRROR_DISK_STATE_DISCONNECTED,
 			    G_MIRROR_EVENT_DONTWAIT);
 			return;
 		}
 		G_MIRROR_LOGREQ(3, bp, "Synchronization request finished.");
 		if (sync->ds_offset >= sc->sc_mediasize ||
 		    sync->ds_consumer == NULL ||
 		    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
 			/* Don't send more synchronization requests. */
 			sync->ds_inflight--;
 			g_mirror_sync_request_free(disk, bp);
 			if (sync->ds_inflight > 0)
 				return;
 			if (sync->ds_consumer == NULL ||
 			    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
 				return;
 			}
 			/* Disk up-to-date, activate it. */
 			g_mirror_event_send(disk, G_MIRROR_DISK_STATE_ACTIVE,
 			    G_MIRROR_EVENT_DONTWAIT);
 			return;
 		}
 
 		/* Send next synchronization request. */
 		g_mirror_sync_reinit(disk, bp, sync->ds_offset);
 		sync->ds_offset += bp->bio_length;
 
 		G_MIRROR_LOGREQ(3, bp, "Sending synchronization request.");
 		sync->ds_consumer->index++;
 
 		/*
 		 * Delay the request if it is colliding with a regular request.
 		 */
 		if (g_mirror_regular_collision(sc, bp))
 			g_mirror_sync_delay(sc, bp);
 		else
 			g_io_request(bp, sync->ds_consumer);
 
 		/* Requeue delayed requests if possible. */
 		g_mirror_regular_release(sc);
 
 		/* Find the smallest offset */
 		offset = sc->sc_mediasize;
 		for (i = 0; i < g_mirror_syncreqs; i++) {
 			bp = sync->ds_bios[i];
 			if (bp != NULL && bp->bio_offset < offset)
 				offset = bp->bio_offset;
 		}
 		if (g_mirror_sync_period > 0 &&
 		    time_uptime - sync->ds_update_ts > g_mirror_sync_period) {
 			sync->ds_offset_done = offset;
 			g_mirror_update_metadata(disk);
 			sync->ds_update_ts = time_uptime;
 		}
 		return;
 	}
 	default:
 		panic("Invalid I/O request %p", bp);
 	}
 }
 
 static void
 g_mirror_request_prefer(struct g_mirror_softc *sc, struct bio *bp)
 {
 	struct g_mirror_disk *disk;
 	struct g_consumer *cp;
 	struct bio *cbp;
 
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE)
 			break;
 	}
 	if (disk == NULL) {
 		if (bp->bio_error == 0)
 			bp->bio_error = ENXIO;
 		g_io_deliver(bp, bp->bio_error);
 		return;
 	}
 	cbp = g_clone_bio(bp);
 	if (cbp == NULL) {
 		if (bp->bio_error == 0)
 			bp->bio_error = ENOMEM;
 		g_io_deliver(bp, bp->bio_error);
 		return;
 	}
 	/*
 	 * Fill in the component buf structure.
 	 */
 	cp = disk->d_consumer;
 	cbp->bio_done = g_mirror_done;
 	cbp->bio_to = cp->provider;
 	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
 	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
 	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
 	    cp->acw, cp->ace));
 	cp->index++;
 	g_io_request(cbp, cp);
 }
 
 static void
 g_mirror_request_round_robin(struct g_mirror_softc *sc, struct bio *bp)
 {
 	struct g_mirror_disk *disk;
 	struct g_consumer *cp;
 	struct bio *cbp;
 
 	disk = g_mirror_get_disk(sc);
 	if (disk == NULL) {
 		if (bp->bio_error == 0)
 			bp->bio_error = ENXIO;
 		g_io_deliver(bp, bp->bio_error);
 		return;
 	}
 	cbp = g_clone_bio(bp);
 	if (cbp == NULL) {
 		if (bp->bio_error == 0)
 			bp->bio_error = ENOMEM;
 		g_io_deliver(bp, bp->bio_error);
 		return;
 	}
 	/*
 	 * Fill in the component buf structure.
 	 */
 	cp = disk->d_consumer;
 	cbp->bio_done = g_mirror_done;
 	cbp->bio_to = cp->provider;
 	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
 	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
 	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
 	    cp->acw, cp->ace));
 	cp->index++;
 	g_io_request(cbp, cp);
 }
 
 #define TRACK_SIZE  (1 * 1024 * 1024)
 #define LOAD_SCALE	256
 #define ABS(x)		(((x) >= 0) ? (x) : (-(x)))
 
 static void
 g_mirror_request_load(struct g_mirror_softc *sc, struct bio *bp)
 {
 	struct g_mirror_disk *disk, *dp;
 	struct g_consumer *cp;
 	struct bio *cbp;
 	int prio, best;
 
 	/* Find a disk with the smallest load. */
 	disk = NULL;
 	best = INT_MAX;
 	LIST_FOREACH(dp, &sc->sc_disks, d_next) {
 		if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
 			continue;
 		prio = dp->load;
 		/* If disk head is precisely in position - highly prefer it. */
 		if (dp->d_last_offset == bp->bio_offset)
 			prio -= 2 * LOAD_SCALE;
 		else
 		/* If disk head is close to position - prefer it. */
 		if (ABS(dp->d_last_offset - bp->bio_offset) < TRACK_SIZE)
 			prio -= 1 * LOAD_SCALE;
 		if (prio <= best) {
 			disk = dp;
 			best = prio;
 		}
 	}
 	KASSERT(disk != NULL, ("NULL disk for %s.", sc->sc_name));
 	cbp = g_clone_bio(bp);
 	if (cbp == NULL) {
 		if (bp->bio_error == 0)
 			bp->bio_error = ENOMEM;
 		g_io_deliver(bp, bp->bio_error);
 		return;
 	}
 	/*
 	 * Fill in the component buf structure.
 	 */
 	cp = disk->d_consumer;
 	cbp->bio_done = g_mirror_done;
 	cbp->bio_to = cp->provider;
 	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
 	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
 	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
 	    cp->acw, cp->ace));
 	cp->index++;
 	/* Remember last head position */
 	disk->d_last_offset = bp->bio_offset + bp->bio_length;
 	/* Update loads. */
 	LIST_FOREACH(dp, &sc->sc_disks, d_next) {
 		dp->load = (dp->d_consumer->index * LOAD_SCALE +
 		    dp->load * 7) / 8;
 	}
 	g_io_request(cbp, cp);
 }
 
 static void
 g_mirror_request_split(struct g_mirror_softc *sc, struct bio *bp)
 {
 	struct bio_queue queue;
 	struct g_mirror_disk *disk;
 	struct g_consumer *cp;
 	struct bio *cbp;
 	off_t left, mod, offset, slice;
 	u_char *data;
 	u_int ndisks;
 
 	if (bp->bio_length <= sc->sc_slice) {
 		g_mirror_request_round_robin(sc, bp);
 		return;
 	}
 	ndisks = g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE);
 	slice = bp->bio_length / ndisks;
 	mod = slice % sc->sc_provider->sectorsize;
 	if (mod != 0)
 		slice += sc->sc_provider->sectorsize - mod;
 	/*
 	 * Allocate all bios before sending any request, so we can
 	 * return ENOMEM in nice and clean way.
 	 */
 	left = bp->bio_length;
 	offset = bp->bio_offset;
 	data = bp->bio_data;
 	TAILQ_INIT(&queue);
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
 			continue;
 		cbp = g_clone_bio(bp);
 		if (cbp == NULL) {
 			while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
 				TAILQ_REMOVE(&queue, cbp, bio_queue);
 				g_destroy_bio(cbp);
 			}
 			if (bp->bio_error == 0)
 				bp->bio_error = ENOMEM;
 			g_io_deliver(bp, bp->bio_error);
 			return;
 		}
 		TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
 		cbp->bio_done = g_mirror_done;
 		cbp->bio_caller1 = disk;
 		cbp->bio_to = disk->d_consumer->provider;
 		cbp->bio_offset = offset;
 		cbp->bio_data = data;
 		cbp->bio_length = MIN(left, slice);
 		left -= cbp->bio_length;
 		if (left == 0)
 			break;
 		offset += cbp->bio_length;
 		data += cbp->bio_length;
 	}
 	while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
 		TAILQ_REMOVE(&queue, cbp, bio_queue);
 		G_MIRROR_LOGREQ(3, cbp, "Sending request.");
 		disk = cbp->bio_caller1;
 		cbp->bio_caller1 = NULL;
 		cp = disk->d_consumer;
 		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
 		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
 		    cp->acr, cp->acw, cp->ace));
 		disk->d_consumer->index++;
 		g_io_request(cbp, disk->d_consumer);
 	}
 }
 
 static void
 g_mirror_register_request(struct g_mirror_softc *sc, struct bio *bp)
 {
 	struct bio_queue queue;
 	struct bio *cbp;
 	struct g_consumer *cp;
 	struct g_mirror_disk *disk;
 
 	sx_assert(&sc->sc_lock, SA_XLOCKED);
 
 	/*
 	 * To avoid ordering issues, if a write is deferred because of a
 	 * collision with a sync request, all I/O is deferred until that
 	 * write is initiated.
 	 */
 	if (bp->bio_from->geom != sc->sc_sync.ds_geom &&
 	    !TAILQ_EMPTY(&sc->sc_regular_delayed)) {
 		g_mirror_regular_delay(sc, bp);
 		return;
 	}
 
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 		switch (sc->sc_balance) {
 		case G_MIRROR_BALANCE_LOAD:
 			g_mirror_request_load(sc, bp);
 			break;
 		case G_MIRROR_BALANCE_PREFER:
 			g_mirror_request_prefer(sc, bp);
 			break;
 		case G_MIRROR_BALANCE_ROUND_ROBIN:
 			g_mirror_request_round_robin(sc, bp);
 			break;
 		case G_MIRROR_BALANCE_SPLIT:
 			g_mirror_request_split(sc, bp);
 			break;
 		}
 		return;
 	case BIO_WRITE:
 	case BIO_DELETE:
 		/*
 		 * Delay the request if it is colliding with a synchronization
 		 * request.
 		 */
 		if (g_mirror_sync_collision(sc, bp)) {
 			g_mirror_regular_delay(sc, bp);
 			return;
 		}
 
 		if (sc->sc_idle)
 			g_mirror_unidle(sc);
 		else
 			sc->sc_last_write = time_uptime;
 
 		/*
 		 * Bump syncid on first write.
 		 */
 		if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID) != 0) {
 			sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID;
 			g_mirror_bump_syncid(sc);
 		}
 
 		/*
 		 * Allocate all bios before sending any request, so we can
 		 * return ENOMEM in nice and clean way.
 		 */
 		TAILQ_INIT(&queue);
 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 			switch (disk->d_state) {
 			case G_MIRROR_DISK_STATE_ACTIVE:
 				break;
 			case G_MIRROR_DISK_STATE_SYNCHRONIZING:
 				if (bp->bio_offset >= disk->d_sync.ds_offset)
 					continue;
 				break;
 			default:
 				continue;
 			}
 			if (bp->bio_cmd == BIO_DELETE &&
 			    (disk->d_flags & G_MIRROR_DISK_FLAG_CANDELETE) == 0)
 				continue;
 			cbp = g_clone_bio(bp);
 			if (cbp == NULL) {
 				while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
 					TAILQ_REMOVE(&queue, cbp, bio_queue);
 					g_destroy_bio(cbp);
 				}
 				if (bp->bio_error == 0)
 					bp->bio_error = ENOMEM;
 				g_io_deliver(bp, bp->bio_error);
 				return;
 			}
 			TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
 			cbp->bio_done = g_mirror_done;
 			cp = disk->d_consumer;
 			cbp->bio_caller1 = cp;
 			cbp->bio_to = cp->provider;
 			KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
 			    ("Consumer %s not opened (r%dw%de%d).",
 			    cp->provider->name, cp->acr, cp->acw, cp->ace));
 		}
 		if (TAILQ_EMPTY(&queue)) {
 			KASSERT(bp->bio_cmd == BIO_DELETE,
 			    ("No consumers for regular request %p", bp));
 			g_io_deliver(bp, EOPNOTSUPP);
 			return;
 		}
 		while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
 			G_MIRROR_LOGREQ(3, cbp, "Sending request.");
 			TAILQ_REMOVE(&queue, cbp, bio_queue);
 			cp = cbp->bio_caller1;
 			cbp->bio_caller1 = NULL;
 			cp->index++;
 			sc->sc_writes++;
 			g_io_request(cbp, cp);
 		}
 		/*
 		 * Put request onto inflight queue, so we can check if new
 		 * synchronization requests don't collide with it.
 		 */
 		TAILQ_INSERT_TAIL(&sc->sc_inflight, bp, bio_queue);
 		return;
 	case BIO_FLUSH:
 		TAILQ_INIT(&queue);
 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 			if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
 				continue;
 			cbp = g_clone_bio(bp);
 			if (cbp == NULL) {
 				while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
 					TAILQ_REMOVE(&queue, cbp, bio_queue);
 					g_destroy_bio(cbp);
 				}
 				if (bp->bio_error == 0)
 					bp->bio_error = ENOMEM;
 				g_io_deliver(bp, bp->bio_error);
 				return;
 			}
 			TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
 			cbp->bio_done = g_mirror_done;
 			cbp->bio_caller1 = disk;
 			cbp->bio_to = disk->d_consumer->provider;
 		}
 		KASSERT(!TAILQ_EMPTY(&queue),
 		    ("No consumers for regular request %p", bp));
 		while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
 			G_MIRROR_LOGREQ(3, cbp, "Sending request.");
 			TAILQ_REMOVE(&queue, cbp, bio_queue);
 			disk = cbp->bio_caller1;
 			cbp->bio_caller1 = NULL;
 			cp = disk->d_consumer;
 			KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
 			    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
 			    cp->acr, cp->acw, cp->ace));
 			cp->index++;
 			g_io_request(cbp, cp);
 		}
 		break;
 	default:
 		KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
 		    bp->bio_cmd, sc->sc_name));
 		break;
 	}
 }
 
 static int
 g_mirror_can_destroy(struct g_mirror_softc *sc)
 {
 	struct g_geom *gp;
 	struct g_consumer *cp;
 
 	g_topology_assert();
 	gp = sc->sc_geom;
 	if (gp->softc == NULL)
 		return (1);
 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_TASTING) != 0)
 		return (0);
 	LIST_FOREACH(cp, &gp->consumer, consumer) {
 		if (g_mirror_is_busy(sc, cp))
 			return (0);
 	}
 	gp = sc->sc_sync.ds_geom;
 	LIST_FOREACH(cp, &gp->consumer, consumer) {
 		if (g_mirror_is_busy(sc, cp))
 			return (0);
 	}
 	G_MIRROR_DEBUG(2, "No I/O requests for %s, it can be destroyed.",
 	    sc->sc_name);
 	return (1);
 }
 
 static int
 g_mirror_try_destroy(struct g_mirror_softc *sc)
 {
 
 	if (sc->sc_rootmount != NULL) {
 		G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", __LINE__,
 		    sc->sc_rootmount);
 		root_mount_rel(sc->sc_rootmount);
 		sc->sc_rootmount = NULL;
 	}
 	g_topology_lock();
 	if (!g_mirror_can_destroy(sc)) {
 		g_topology_unlock();
 		return (0);
 	}
 	sc->sc_geom->softc = NULL;
 	sc->sc_sync.ds_geom->softc = NULL;
 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DRAIN) != 0) {
 		g_topology_unlock();
 		G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
 		    &sc->sc_worker);
 		/* Unlock sc_lock here, as it can be destroyed after wakeup. */
 		sx_xunlock(&sc->sc_lock);
 		wakeup(&sc->sc_worker);
 		sc->sc_worker = NULL;
 	} else {
 		g_topology_unlock();
 		g_mirror_destroy_device(sc);
 	}
 	return (1);
 }
 
 /*
  * Worker thread.
  */
 static void
 g_mirror_worker(void *arg)
 {
 	struct g_mirror_softc *sc;
 	struct g_mirror_event *ep;
 	struct bio *bp;
 	int timeout;
 
 	sc = arg;
 	thread_lock(curthread);
 	sched_prio(curthread, PRIBIO);
 	thread_unlock(curthread);
 
 	sx_xlock(&sc->sc_lock);
 	for (;;) {
 		G_MIRROR_DEBUG(5, "%s: Let's see...", __func__);
 		/*
 		 * First take a look at events.
 		 * This is important to handle events before any I/O requests.
 		 */
 		ep = g_mirror_event_first(sc);
 		if (ep != NULL) {
 			g_mirror_event_remove(sc, ep);
 			if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0) {
 				/* Update only device status. */
 				G_MIRROR_DEBUG(3,
 				    "Running event for device %s.",
 				    sc->sc_name);
 				ep->e_error = 0;
 				g_mirror_update_device(sc, true);
 			} else {
 				/* Update disk status. */
 				G_MIRROR_DEBUG(3, "Running event for disk %s.",
 				     g_mirror_get_diskname(ep->e_disk));
 				ep->e_error = g_mirror_update_disk(ep->e_disk,
 				    ep->e_state);
 				if (ep->e_error == 0)
 					g_mirror_update_device(sc, false);
 			}
 			if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0) {
 				KASSERT(ep->e_error == 0,
 				    ("Error cannot be handled."));
 				g_mirror_event_free(ep);
 			} else {
 				ep->e_flags |= G_MIRROR_EVENT_DONE;
 				G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
 				    ep);
 				mtx_lock(&sc->sc_events_mtx);
 				wakeup(ep);
 				mtx_unlock(&sc->sc_events_mtx);
 			}
 			if ((sc->sc_flags &
 			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
 				if (g_mirror_try_destroy(sc)) {
 					curthread->td_pflags &= ~TDP_GEOM;
 					G_MIRROR_DEBUG(1, "Thread exiting.");
 					kproc_exit(0);
 				}
 			}
 			G_MIRROR_DEBUG(5, "%s: I'm here 1.", __func__);
 			continue;
 		}
 
 		/*
 		 * Check if we can mark array as CLEAN and if we can't take
 		 * how much seconds should we wait.
 		 */
 		timeout = g_mirror_idle(sc, -1);
 
 		/*
 		 * Handle I/O requests.
 		 */
 		mtx_lock(&sc->sc_queue_mtx);
 		bp = TAILQ_FIRST(&sc->sc_queue);
 		if (bp != NULL)
 			TAILQ_REMOVE(&sc->sc_queue, bp, bio_queue);
 		else {
 			if ((sc->sc_flags &
 			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
 				mtx_unlock(&sc->sc_queue_mtx);
 				if (g_mirror_try_destroy(sc)) {
 					curthread->td_pflags &= ~TDP_GEOM;
 					G_MIRROR_DEBUG(1, "Thread exiting.");
 					kproc_exit(0);
 				}
 				mtx_lock(&sc->sc_queue_mtx);
 				if (!TAILQ_EMPTY(&sc->sc_queue)) {
 					mtx_unlock(&sc->sc_queue_mtx);
 					continue;
 				}
 			}
 			if (g_mirror_event_first(sc) != NULL) {
 				mtx_unlock(&sc->sc_queue_mtx);
 				continue;
 			}
 			sx_xunlock(&sc->sc_lock);
 			MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, "m:w1",
 			    timeout * hz);
 			sx_xlock(&sc->sc_lock);
 			G_MIRROR_DEBUG(5, "%s: I'm here 4.", __func__);
 			continue;
 		}
 		mtx_unlock(&sc->sc_queue_mtx);
 
 		if (bp->bio_from->geom == sc->sc_sync.ds_geom &&
 		    (bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0) {
 			/*
 			 * Handle completion of the first half (the read) of a
 			 * block synchronization operation.
 			 */
 			g_mirror_sync_request(sc, bp);
 		} else if (bp->bio_to != sc->sc_provider) {
 			if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_REGULAR) != 0)
 				/*
 				 * Handle completion of a regular I/O request.
 				 */
 				g_mirror_regular_request(sc, bp);
 			else if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0)
 				/*
 				 * Handle completion of the second half (the
 				 * write) of a block synchronization operation.
 				 */
 				g_mirror_sync_request(sc, bp);
 			else {
 				KASSERT(0,
 				    ("Invalid request cflags=0x%hx to=%s.",
 				    bp->bio_cflags, bp->bio_to->name));
 			}
 		} else {
 			/*
 			 * Initiate an I/O request.
 			 */
 			g_mirror_register_request(sc, bp);
 		}
 		G_MIRROR_DEBUG(5, "%s: I'm here 9.", __func__);
 	}
 }
 
 static void
 g_mirror_update_idle(struct g_mirror_softc *sc, struct g_mirror_disk *disk)
 {
 
 	sx_assert(&sc->sc_lock, SX_LOCKED);
 
 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
 		return;
 	if (!sc->sc_idle && (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) == 0) {
 		G_MIRROR_DEBUG(2, "Disk %s (device %s) marked as dirty.",
 		    g_mirror_get_diskname(disk), sc->sc_name);
 		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
 	} else if (sc->sc_idle &&
 	    (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
 		G_MIRROR_DEBUG(2, "Disk %s (device %s) marked as clean.",
 		    g_mirror_get_diskname(disk), sc->sc_name);
 		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
 	}
 }
 
 static void
 g_mirror_sync_reinit(const struct g_mirror_disk *disk, struct bio *bp,
     off_t offset)
 {
 	void *data;
 	int idx;
 
 	data = bp->bio_data;
 	idx = (int)(uintptr_t)bp->bio_caller1;
 	g_reset_bio(bp);
 
 	bp->bio_cmd = BIO_READ;
 	bp->bio_data = data;
 	bp->bio_done = g_mirror_sync_done;
 	bp->bio_from = disk->d_sync.ds_consumer;
 	bp->bio_to = disk->d_softc->sc_provider;
 	bp->bio_caller1 = (void *)(uintptr_t)idx;
 	bp->bio_offset = offset;
 	bp->bio_length = MIN(MAXPHYS,
 	    disk->d_softc->sc_mediasize - bp->bio_offset);
 }
 
 static void
 g_mirror_sync_start(struct g_mirror_disk *disk)
 {
 	struct g_mirror_softc *sc;
 	struct g_mirror_disk_sync *sync;
 	struct g_consumer *cp;
 	struct bio *bp;
 	int error, i;
 
 	g_topology_assert_not();
 	sc = disk->d_softc;
 	sync = &disk->d_sync;
 	sx_assert(&sc->sc_lock, SX_LOCKED);
 
 	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
 	    ("Disk %s is not marked for synchronization.",
 	    g_mirror_get_diskname(disk)));
 	KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
 	    ("Device not in RUNNING state (%s, %u).", sc->sc_name,
 	    sc->sc_state));
 
 	sx_xunlock(&sc->sc_lock);
 	g_topology_lock();
 	cp = g_new_consumer(sc->sc_sync.ds_geom);
 	cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
 	error = g_attach(cp, sc->sc_provider);
 	KASSERT(error == 0,
 	    ("Cannot attach to %s (error=%d).", sc->sc_name, error));
 	error = g_access(cp, 1, 0, 0);
 	KASSERT(error == 0, ("Cannot open %s (error=%d).", sc->sc_name, error));
 	g_topology_unlock();
 	sx_xlock(&sc->sc_lock);
 
 	G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s.", sc->sc_name,
 	    g_mirror_get_diskname(disk));
 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) == 0)
 		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
 	KASSERT(sync->ds_consumer == NULL,
 	    ("Sync consumer already exists (device=%s, disk=%s).",
 	    sc->sc_name, g_mirror_get_diskname(disk)));
 
 	sync->ds_consumer = cp;
 	sync->ds_consumer->private = disk;
 	sync->ds_consumer->index = 0;
 
 	/*
 	 * Allocate memory for synchronization bios and initialize them.
 	 */
 	sync->ds_bios = malloc(sizeof(struct bio *) * g_mirror_syncreqs,
 	    M_MIRROR, M_WAITOK);
 	for (i = 0; i < g_mirror_syncreqs; i++) {
 		bp = g_alloc_bio();
 		sync->ds_bios[i] = bp;
 
 		bp->bio_data = malloc(MAXPHYS, M_MIRROR, M_WAITOK);
 		bp->bio_caller1 = (void *)(uintptr_t)i;
 		g_mirror_sync_reinit(disk, bp, sync->ds_offset);
 		sync->ds_offset += bp->bio_length;
 	}
 
 	/* Increase the number of disks in SYNCHRONIZING state. */
 	sc->sc_sync.ds_ndisks++;
 	/* Set the number of in-flight synchronization requests. */
 	sync->ds_inflight = g_mirror_syncreqs;
 
 	/*
 	 * Fire off first synchronization requests.
 	 */
 	for (i = 0; i < g_mirror_syncreqs; i++) {
 		bp = sync->ds_bios[i];
 		G_MIRROR_LOGREQ(3, bp, "Sending synchronization request.");
 		sync->ds_consumer->index++;
 		/*
 		 * Delay the request if it is colliding with a regular request.
 		 */
 		if (g_mirror_regular_collision(sc, bp))
 			g_mirror_sync_delay(sc, bp);
 		else
 			g_io_request(bp, sync->ds_consumer);
 	}
 }
 
 /*
  * Stop synchronization process.
  * type: 0 - synchronization finished
  *       1 - synchronization stopped
  */
 static void
 g_mirror_sync_stop(struct g_mirror_disk *disk, int type)
 {
 	struct g_mirror_softc *sc;
 	struct g_consumer *cp;
 
 	g_topology_assert_not();
 	sc = disk->d_softc;
 	sx_assert(&sc->sc_lock, SX_LOCKED);
 
 	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
 	    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
 	    g_mirror_disk_state2str(disk->d_state)));
 	if (disk->d_sync.ds_consumer == NULL)
 		return;
 
 	if (type == 0) {
 		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s finished.",
 		    sc->sc_name, g_mirror_get_diskname(disk));
 	} else /* if (type == 1) */ {
 		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s stopped.",
 		    sc->sc_name, g_mirror_get_diskname(disk));
 	}
 	g_mirror_regular_release(sc);
 	free(disk->d_sync.ds_bios, M_MIRROR);
 	disk->d_sync.ds_bios = NULL;
 	cp = disk->d_sync.ds_consumer;
 	disk->d_sync.ds_consumer = NULL;
 	disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
 	sc->sc_sync.ds_ndisks--;
 	sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */
 	g_topology_lock();
 	g_mirror_kill_consumer(sc, cp);
 	g_topology_unlock();
 	sx_xlock(&sc->sc_lock);
 }
 
 static void
 g_mirror_launch_provider(struct g_mirror_softc *sc)
 {
 	struct g_mirror_disk *disk;
 	struct g_provider *pp, *dp;
 
 	sx_assert(&sc->sc_lock, SX_LOCKED);
 
 	g_topology_lock();
 	pp = g_new_providerf(sc->sc_geom, "mirror/%s", sc->sc_name);
 	pp->flags |= G_PF_DIRECT_RECEIVE;
 	pp->mediasize = sc->sc_mediasize;
 	pp->sectorsize = sc->sc_sectorsize;
 	pp->stripesize = 0;
 	pp->stripeoffset = 0;
 
 	/* Splitting of unmapped BIO's could work but isn't implemented now */
 	if (sc->sc_balance != G_MIRROR_BALANCE_SPLIT)
 		pp->flags |= G_PF_ACCEPT_UNMAPPED;
 
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (disk->d_consumer && disk->d_consumer->provider) {
 			dp = disk->d_consumer->provider;
 			if (dp->stripesize > pp->stripesize) {
 				pp->stripesize = dp->stripesize;
 				pp->stripeoffset = dp->stripeoffset;
 			}
 			/* A provider underneath us doesn't support unmapped */
 			if ((dp->flags & G_PF_ACCEPT_UNMAPPED) == 0) {
 				G_MIRROR_DEBUG(0, "Cancelling unmapped "
 				    "because of %s.", dp->name);
 				pp->flags &= ~G_PF_ACCEPT_UNMAPPED;
 			}
 		}
 	}
 	pp->private = sc;
 	sc->sc_refcnt++;
 	sc->sc_provider = pp;
 	g_error_provider(pp, 0);
 	g_topology_unlock();
 	G_MIRROR_DEBUG(0, "Device %s launched (%u/%u).", pp->name,
 	    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE), sc->sc_ndisks);
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
 			g_mirror_sync_start(disk);
 	}
 }
 
 static void
 g_mirror_destroy_provider(struct g_mirror_softc *sc)
 {
 	struct g_mirror_disk *disk;
 	struct bio *bp;
 
 	g_topology_assert_not();
 	KASSERT(sc->sc_provider != NULL, ("NULL provider (device=%s).",
 	    sc->sc_name));
 
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
 			g_mirror_sync_stop(disk, 1);
 	}
 
 	g_topology_lock();
 	g_error_provider(sc->sc_provider, ENXIO);
 	mtx_lock(&sc->sc_queue_mtx);
 	while ((bp = TAILQ_FIRST(&sc->sc_queue)) != NULL) {
 		TAILQ_REMOVE(&sc->sc_queue, bp, bio_queue);
 		/*
 		 * Abort any pending I/O that wasn't generated by us.
 		 * Synchronization requests and requests destined for individual
 		 * mirror components can be destroyed immediately.
 		 */
 		if (bp->bio_to == sc->sc_provider &&
 		    bp->bio_from->geom != sc->sc_sync.ds_geom) {
 			g_io_deliver(bp, ENXIO);
 		} else {
 			if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0)
 				free(bp->bio_data, M_MIRROR);
 			g_destroy_bio(bp);
 		}
 	}
 	mtx_unlock(&sc->sc_queue_mtx);
 	g_wither_provider(sc->sc_provider, ENXIO);
 	sc->sc_provider = NULL;
 	G_MIRROR_DEBUG(0, "Device %s: provider destroyed.", sc->sc_name);
 	g_topology_unlock();
 }
 
 static void
 g_mirror_go(void *arg)
 {
 	struct g_mirror_softc *sc;
 
 	sc = arg;
 	G_MIRROR_DEBUG(0, "Force device %s start due to timeout.", sc->sc_name);
 	g_mirror_event_send(sc, 0,
 	    G_MIRROR_EVENT_DONTWAIT | G_MIRROR_EVENT_DEVICE);
 }
 
 static u_int
 g_mirror_determine_state(struct g_mirror_disk *disk)
 {
 	struct g_mirror_softc *sc;
 	u_int state;
 
 	sc = disk->d_softc;
 	if (sc->sc_syncid == disk->d_sync.ds_syncid) {
 		if ((disk->d_flags &
 		    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0 &&
 		    (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 0 ||
 		     (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) == 0)) {
 			/* Disk does not need synchronization. */
 			state = G_MIRROR_DISK_STATE_ACTIVE;
 		} else {
 			if ((sc->sc_flags &
 			     G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
 			    (disk->d_flags &
 			     G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
 				/*
 				 * We can start synchronization from
 				 * the stored offset.
 				 */
 				state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
 			} else {
 				state = G_MIRROR_DISK_STATE_STALE;
 			}
 		}
 	} else if (disk->d_sync.ds_syncid < sc->sc_syncid) {
 		/*
 		 * Reset all synchronization data for this disk,
 		 * because if it even was synchronized, it was
 		 * synchronized to disks with different syncid.
 		 */
 		disk->d_flags |= G_MIRROR_DISK_FLAG_SYNCHRONIZING;
 		disk->d_sync.ds_offset = 0;
 		disk->d_sync.ds_offset_done = 0;
 		disk->d_sync.ds_syncid = sc->sc_syncid;
 		if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
 		    (disk->d_flags & G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
 			state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
 		} else {
 			state = G_MIRROR_DISK_STATE_STALE;
 		}
 	} else /* if (sc->sc_syncid < disk->d_sync.ds_syncid) */ {
 		/*
 		 * Not good, NOT GOOD!
 		 * It means that mirror was started on stale disks
 		 * and more fresh disk just arrive.
 		 * If there were writes, mirror is broken, sorry.
 		 * I think the best choice here is don't touch
 		 * this disk and inform the user loudly.
 		 */
 		G_MIRROR_DEBUG(0, "Device %s was started before the freshest "
 		    "disk (%s) arrives!! It will not be connected to the "
 		    "running device.", sc->sc_name,
 		    g_mirror_get_diskname(disk));
 		g_mirror_destroy_disk(disk);
 		state = G_MIRROR_DISK_STATE_NONE;
 		/* Return immediately, because disk was destroyed. */
 		return (state);
 	}
 	G_MIRROR_DEBUG(3, "State for %s disk: %s.",
 	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(state));
 	return (state);
 }
 
 /*
  * Update device state.
  */
 static void
 g_mirror_update_device(struct g_mirror_softc *sc, bool force)
 {
 	struct g_mirror_disk *disk;
 	u_int state;
 
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	switch (sc->sc_state) {
 	case G_MIRROR_DEVICE_STATE_STARTING:
 	    {
 		struct g_mirror_disk *pdisk, *tdisk;
 		const char *mismatch;
 		uintmax_t found, newest;
 		u_int dirty, ndisks;
 
 		/* Pre-flight checks */
 		LIST_FOREACH_SAFE(disk, &sc->sc_disks, d_next, tdisk) {
 			/*
 			 * Confirm we already detected the newest genid.
 			 */
 			KASSERT(sc->sc_genid >= disk->d_genid,
 			    ("%s: found newer genid %u (sc:%p had %u).", __func__,
 			    disk->d_genid, sc, sc->sc_genid));
 
 			/* Kick out any previously tasted stale components. */
 			if (disk->d_genid < sc->sc_genid) {
 				G_MIRROR_DEBUG(0, "Stale 'genid' field on %s "
 				    "(device %s) (component=%u latest=%u), skipping.",
 				    g_mirror_get_diskname(disk), sc->sc_name,
 				    disk->d_genid, sc->sc_genid);
 				g_mirror_destroy_disk(disk);
 				sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
 				continue;
 			}
 
 			/*
 			 * Confirm we already detected the newest syncid.
 			 */
 			KASSERT(sc->sc_syncid >= disk->d_sync.ds_syncid,
 			    ("%s: found newer syncid %u (sc:%p had %u).",
 			     __func__, disk->d_sync.ds_syncid, sc,
 			     sc->sc_syncid));
 
 #define DETECT_MISMATCH(field, name) \
 			if (mismatch == NULL &&					\
 			    disk->d_init_ ## field != sc->sc_ ## field) {	\
 				mismatch = name;				\
 				found = (intmax_t)disk->d_init_ ## field;	\
 				newest = (intmax_t)sc->sc_ ## field;		\
 			}
 			mismatch = NULL;
 			DETECT_MISMATCH(ndisks, "md_all");
 			DETECT_MISMATCH(balance, "md_balance");
 			DETECT_MISMATCH(slice, "md_slice");
 			DETECT_MISMATCH(mediasize, "md_mediasize");
 #undef DETECT_MISMATCH
 			if (mismatch != NULL) {
 				G_MIRROR_DEBUG(0, "Found a mismatching '%s' "
 				    "field on %s (device %s) (found=%ju "
 				    "newest=%ju).", mismatch,
 				    g_mirror_get_diskname(disk), sc->sc_name,
 				    found, newest);
 				g_mirror_destroy_disk(disk);
 				sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
 				continue;
 			}
 		}
 
 		KASSERT(sc->sc_provider == NULL,
 		    ("Non-NULL provider in STARTING state (%s).", sc->sc_name));
 		/*
 		 * Are we ready? If the timeout (force is true) has expired, and
 		 * any disks are present, then yes. If we're permitted to launch
 		 * before the timeout has expired and the expected number of
 		 * current-generation mirror disks have been tasted, then yes.
 		 */
 		ndisks = g_mirror_ndisks(sc, -1);
 		if ((force && ndisks > 0) ||
 		    (g_launch_mirror_before_timeout && ndisks == sc->sc_ndisks)) {
 			;
 		} else if (ndisks == 0) {
 			/*
 			 * Disks went down in starting phase, so destroy
 			 * device.
 			 */
 			callout_drain(&sc->sc_callout);
 			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
 			G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", __LINE__,
 			    sc->sc_rootmount);
 			root_mount_rel(sc->sc_rootmount);
 			sc->sc_rootmount = NULL;
 			return;
 		} else {
 			return;
 		}
 
 		/*
 		 * Activate all disks with the biggest syncid.
 		 */
 		if (force) {
 			/*
 			 * If 'force' is true, we have been called due to
 			 * timeout, so don't bother canceling timeout.
 			 */
 			ndisks = 0;
 			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 				if ((disk->d_flags &
 				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) {
 					ndisks++;
 				}
 			}
 			if (ndisks == 0) {
 				/* No valid disks found, destroy device. */
 				sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
 				G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p",
 				    __LINE__, sc->sc_rootmount);
 				root_mount_rel(sc->sc_rootmount);
 				sc->sc_rootmount = NULL;
 				return;
 			}
 		} else {
 			/* Cancel timeout. */
 			callout_drain(&sc->sc_callout);
 		}
 
 		/*
 		 * Here we need to look for dirty disks and if all disks
 		 * with the biggest syncid are dirty, we have to choose
 		 * one with the biggest priority and rebuild the rest.
 		 */
 		/*
 		 * Find the number of dirty disks with the biggest syncid.
 		 * Find the number of disks with the biggest syncid.
 		 * While here, find a disk with the biggest priority.
 		 */
 		dirty = ndisks = 0;
 		pdisk = NULL;
 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 			if (disk->d_sync.ds_syncid != sc->sc_syncid)
 				continue;
 			if ((disk->d_flags &
 			    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
 				continue;
 			}
 			ndisks++;
 			if ((disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
 				dirty++;
 				if (pdisk == NULL ||
 				    pdisk->d_priority < disk->d_priority) {
 					pdisk = disk;
 				}
 			}
 		}
 		if (dirty == 0) {
 			/* No dirty disks at all, great. */
 		} else if (dirty == ndisks) {
 			/*
 			 * Force synchronization for all dirty disks except one
 			 * with the biggest priority.
 			 */
 			KASSERT(pdisk != NULL, ("pdisk == NULL"));
 			G_MIRROR_DEBUG(1, "Using disk %s (device %s) as a "
 			    "master disk for synchronization.",
 			    g_mirror_get_diskname(pdisk), sc->sc_name);
 			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 				if (disk->d_sync.ds_syncid != sc->sc_syncid)
 					continue;
 				if ((disk->d_flags &
 				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
 					continue;
 				}
 				KASSERT((disk->d_flags &
 				    G_MIRROR_DISK_FLAG_DIRTY) != 0,
 				    ("Disk %s isn't marked as dirty.",
 				    g_mirror_get_diskname(disk)));
 				/* Skip the disk with the biggest priority. */
 				if (disk == pdisk)
 					continue;
 				disk->d_sync.ds_syncid = 0;
 			}
 		} else if (dirty < ndisks) {
 			/*
 			 * Force synchronization for all dirty disks.
 			 * We have some non-dirty disks.
 			 */
 			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 				if (disk->d_sync.ds_syncid != sc->sc_syncid)
 					continue;
 				if ((disk->d_flags &
 				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
 					continue;
 				}
 				if ((disk->d_flags &
 				    G_MIRROR_DISK_FLAG_DIRTY) == 0) {
 					continue;
 				}
 				disk->d_sync.ds_syncid = 0;
 			}
 		}
 
 		/* Reset hint. */
 		sc->sc_hint = NULL;
 		if (force) {
 			/* Remember to bump syncid on first write. */
 			sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
 		}
 		state = G_MIRROR_DEVICE_STATE_RUNNING;
 		G_MIRROR_DEBUG(1, "Device %s state changed from %s to %s.",
 		    sc->sc_name, g_mirror_device_state2str(sc->sc_state),
 		    g_mirror_device_state2str(state));
 		sc->sc_state = state;
 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 			state = g_mirror_determine_state(disk);
 			g_mirror_event_send(disk, state,
 			    G_MIRROR_EVENT_DONTWAIT);
 			if (state == G_MIRROR_DISK_STATE_STALE)
 				sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
 		}
 		break;
 	    }
 	case G_MIRROR_DEVICE_STATE_RUNNING:
 		if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 0 &&
 		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
 			/*
 			 * No usable disks, so destroy the device.
 			 */
 			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
 			break;
 		} else if (g_mirror_ndisks(sc,
 		    G_MIRROR_DISK_STATE_ACTIVE) > 0 &&
 		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
 			/*
 			 * We have active disks, launch provider if it doesn't
 			 * exist.
 			 */
 			if (sc->sc_provider == NULL)
 				g_mirror_launch_provider(sc);
 			if (sc->sc_rootmount != NULL) {
 				G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p",
 				    __LINE__, sc->sc_rootmount);
 				root_mount_rel(sc->sc_rootmount);
 				sc->sc_rootmount = NULL;
 			}
 		}
 		/*
 		 * Genid should be bumped immediately, so do it here.
 		 */
 		if ((sc->sc_bump_id & G_MIRROR_BUMP_GENID) != 0) {
 			sc->sc_bump_id &= ~G_MIRROR_BUMP_GENID;
 			g_mirror_bump_genid(sc);
 		}
 		if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID_NOW) != 0) {
 			sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID_NOW;
 			g_mirror_bump_syncid(sc);
 		}
 		break;
 	default:
 		KASSERT(1 == 0, ("Wrong device state (%s, %s).",
 		    sc->sc_name, g_mirror_device_state2str(sc->sc_state)));
 		break;
 	}
 }
 
 /*
  * Update disk state and device state if needed.
  */
 #define	DISK_STATE_CHANGED()	G_MIRROR_DEBUG(1,			\
 	"Disk %s state changed from %s to %s (device %s).",		\
 	g_mirror_get_diskname(disk),					\
 	g_mirror_disk_state2str(disk->d_state),				\
 	g_mirror_disk_state2str(state), sc->sc_name)
 static int
 g_mirror_update_disk(struct g_mirror_disk *disk, u_int state)
 {
 	struct g_mirror_softc *sc;
 
 	sc = disk->d_softc;
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 again:
 	G_MIRROR_DEBUG(3, "Changing disk %s state from %s to %s.",
 	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(disk->d_state),
 	    g_mirror_disk_state2str(state));
 	switch (state) {
 	case G_MIRROR_DISK_STATE_NEW:
 		/*
 		 * Possible scenarios:
 		 * 1. New disk arrive.
 		 */
 		/* Previous state should be NONE. */
 		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NONE,
 		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
 		    g_mirror_disk_state2str(disk->d_state)));
 		DISK_STATE_CHANGED();
 
 		disk->d_state = state;
 		g_topology_lock();
 		if (LIST_EMPTY(&sc->sc_disks))
 			LIST_INSERT_HEAD(&sc->sc_disks, disk, d_next);
 		else {
 			struct g_mirror_disk *dp;
 
 			LIST_FOREACH(dp, &sc->sc_disks, d_next) {
 				if (disk->d_priority >= dp->d_priority) {
 					LIST_INSERT_BEFORE(dp, disk, d_next);
 					dp = NULL;
 					break;
 				}
 				if (LIST_NEXT(dp, d_next) == NULL)
 					break;
 			}
 			if (dp != NULL)
 				LIST_INSERT_AFTER(dp, disk, d_next);
 		}
 		g_topology_unlock();
 		G_MIRROR_DEBUG(1, "Device %s: provider %s detected.",
 		    sc->sc_name, g_mirror_get_diskname(disk));
 		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
 			break;
 		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
 		    g_mirror_device_state2str(sc->sc_state),
 		    g_mirror_get_diskname(disk),
 		    g_mirror_disk_state2str(disk->d_state)));
 		state = g_mirror_determine_state(disk);
 		if (state != G_MIRROR_DISK_STATE_NONE)
 			goto again;
 		break;
 	case G_MIRROR_DISK_STATE_ACTIVE:
 		/*
 		 * Possible scenarios:
 		 * 1. New disk does not need synchronization.
 		 * 2. Synchronization process finished successfully.
 		 */
 		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
 		    g_mirror_device_state2str(sc->sc_state),
 		    g_mirror_get_diskname(disk),
 		    g_mirror_disk_state2str(disk->d_state)));
 		/* Previous state should be NEW or SYNCHRONIZING. */
 		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW ||
 		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
 		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
 		    g_mirror_disk_state2str(disk->d_state)));
 		DISK_STATE_CHANGED();
 
 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
 			disk->d_flags &= ~G_MIRROR_DISK_FLAG_SYNCHRONIZING;
 			disk->d_flags &= ~G_MIRROR_DISK_FLAG_FORCE_SYNC;
 			g_mirror_sync_stop(disk, 0);
 		}
 		disk->d_state = state;
 		disk->d_sync.ds_offset = 0;
 		disk->d_sync.ds_offset_done = 0;
 		g_mirror_update_idle(sc, disk);
 		g_mirror_update_metadata(disk);
 		G_MIRROR_DEBUG(1, "Device %s: provider %s activated.",
 		    sc->sc_name, g_mirror_get_diskname(disk));
 		break;
 	case G_MIRROR_DISK_STATE_STALE:
 		/*
 		 * Possible scenarios:
 		 * 1. Stale disk was connected.
 		 */
 		/* Previous state should be NEW. */
 		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
 		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
 		    g_mirror_disk_state2str(disk->d_state)));
 		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
 		    g_mirror_device_state2str(sc->sc_state),
 		    g_mirror_get_diskname(disk),
 		    g_mirror_disk_state2str(disk->d_state)));
 		/*
 		 * STALE state is only possible if device is marked
 		 * NOAUTOSYNC.
 		 */
 		KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) != 0,
 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
 		    g_mirror_device_state2str(sc->sc_state),
 		    g_mirror_get_diskname(disk),
 		    g_mirror_disk_state2str(disk->d_state)));
 		DISK_STATE_CHANGED();
 
 		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
 		disk->d_state = state;
 		g_mirror_update_metadata(disk);
 		G_MIRROR_DEBUG(0, "Device %s: provider %s is stale.",
 		    sc->sc_name, g_mirror_get_diskname(disk));
 		break;
 	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
 		/*
 		 * Possible scenarios:
 		 * 1. Disk which needs synchronization was connected.
 		 */
 		/* Previous state should be NEW. */
 		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
 		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
 		    g_mirror_disk_state2str(disk->d_state)));
 		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
 		    g_mirror_device_state2str(sc->sc_state),
 		    g_mirror_get_diskname(disk),
 		    g_mirror_disk_state2str(disk->d_state)));
 		DISK_STATE_CHANGED();
 
 		if (disk->d_state == G_MIRROR_DISK_STATE_NEW)
 			disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
 		disk->d_state = state;
 		if (sc->sc_provider != NULL) {
 			g_mirror_sync_start(disk);
 			g_mirror_update_metadata(disk);
 		}
 		break;
 	case G_MIRROR_DISK_STATE_DISCONNECTED:
 		/*
 		 * Possible scenarios:
 		 * 1. Device wasn't running yet, but disk disappear.
 		 * 2. Disk was active and disapppear.
 		 * 3. Disk disappear during synchronization process.
 		 */
 		if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING) {
 			/*
 			 * Previous state should be ACTIVE, STALE or
 			 * SYNCHRONIZING.
 			 */
 			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
 			    disk->d_state == G_MIRROR_DISK_STATE_STALE ||
 			    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
 			    ("Wrong disk state (%s, %s).",
 			    g_mirror_get_diskname(disk),
 			    g_mirror_disk_state2str(disk->d_state)));
 		} else if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING) {
 			/* Previous state should be NEW. */
 			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
 			    ("Wrong disk state (%s, %s).",
 			    g_mirror_get_diskname(disk),
 			    g_mirror_disk_state2str(disk->d_state)));
 			/*
 			 * Reset bumping syncid if disk disappeared in STARTING
 			 * state.
 			 */
 			if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID) != 0)
 				sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID;
 #ifdef	INVARIANTS
 		} else {
 			KASSERT(1 == 0, ("Wrong device state (%s, %s, %s, %s).",
 			    sc->sc_name,
 			    g_mirror_device_state2str(sc->sc_state),
 			    g_mirror_get_diskname(disk),
 			    g_mirror_disk_state2str(disk->d_state)));
 #endif
 		}
 		DISK_STATE_CHANGED();
 		G_MIRROR_DEBUG(0, "Device %s: provider %s disconnected.",
 		    sc->sc_name, g_mirror_get_diskname(disk));
 
 		g_mirror_destroy_disk(disk);
 		break;
 	case G_MIRROR_DISK_STATE_DESTROY:
 	    {
 		int error;
 
 		error = g_mirror_clear_metadata(disk);
 		if (error != 0) {
 			G_MIRROR_DEBUG(0,
 			    "Device %s: failed to clear metadata on %s: %d.",
 			    sc->sc_name, g_mirror_get_diskname(disk), error);
 			break;
 		}
 		DISK_STATE_CHANGED();
 		G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.",
 		    sc->sc_name, g_mirror_get_diskname(disk));
 
 		g_mirror_destroy_disk(disk);
 		sc->sc_ndisks--;
 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 			g_mirror_update_metadata(disk);
 		}
 		break;
 	    }
 	default:
 		KASSERT(1 == 0, ("Unknown state (%u).", state));
 		break;
 	}
 	return (0);
 }
 #undef	DISK_STATE_CHANGED
 
 int
 g_mirror_read_metadata(struct g_consumer *cp, struct g_mirror_metadata *md)
 {
 	struct g_provider *pp;
 	u_char *buf;
 	int error;
 
 	g_topology_assert();
 
 	error = g_access(cp, 1, 0, 0);
 	if (error != 0)
 		return (error);
 	pp = cp->provider;
 	g_topology_unlock();
 	/* Metadata are stored on last sector. */
 	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
 	    &error);
 	g_topology_lock();
 	g_access(cp, -1, 0, 0);
 	if (buf == NULL) {
 		G_MIRROR_DEBUG(1, "Cannot read metadata from %s (error=%d).",
 		    cp->provider->name, error);
 		return (error);
 	}
 
 	/* Decode metadata. */
 	error = mirror_metadata_decode(buf, md);
 	g_free(buf);
 	if (strcmp(md->md_magic, G_MIRROR_MAGIC) != 0)
 		return (EINVAL);
 	if (md->md_version > G_MIRROR_VERSION) {
 		G_MIRROR_DEBUG(0,
 		    "Kernel module is too old to handle metadata from %s.",
 		    cp->provider->name);
 		return (EINVAL);
 	}
 	if (error != 0) {
 		G_MIRROR_DEBUG(1, "MD5 metadata hash mismatch for provider %s.",
 		    cp->provider->name);
 		return (error);
 	}
 
 	return (0);
 }
 
 static int
 g_mirror_check_metadata(struct g_mirror_softc *sc, struct g_provider *pp,
     struct g_mirror_metadata *md)
 {
 
 	G_MIRROR_DEBUG(2, "%s: md_did 0x%u disk %s device %s md_all 0x%x "
 	    "sc_ndisks 0x%x md_slice 0x%x sc_slice 0x%x md_balance 0x%x "
 	    "sc_balance 0x%x sc_mediasize 0x%jx pp_mediasize 0x%jx "
 	    "md_sectorsize 0x%x sc_sectorsize 0x%x md_mflags 0x%jx "
 	    "md_dflags 0x%jx md_syncid 0x%x md_genid 0x%x md_priority 0x%x "
 	    "sc_state 0x%x.",
 	    __func__, md->md_did, pp->name, sc->sc_name, md->md_all,
 	    sc->sc_ndisks, md->md_slice, sc->sc_slice, md->md_balance,
 	    sc->sc_balance, (uintmax_t)sc->sc_mediasize,
 	    (uintmax_t)pp->mediasize, md->md_sectorsize, sc->sc_sectorsize,
 	    (uintmax_t)md->md_mflags, (uintmax_t)md->md_dflags, md->md_syncid,
 	    md->md_genid, md->md_priority, sc->sc_state);
 
 	if (g_mirror_id2disk(sc, md->md_did) != NULL) {
 		G_MIRROR_DEBUG(1, "Disk %s (id=%u) already exists, skipping.",
 		    pp->name, md->md_did);
 		return (EEXIST);
 	}
 	if (sc->sc_mediasize > pp->mediasize) {
 		G_MIRROR_DEBUG(1,
 		    "Invalid size of disk %s (device %s), skipping.", pp->name,
 		    sc->sc_name);
 		return (EINVAL);
 	}
 	if (md->md_sectorsize != sc->sc_sectorsize) {
 		G_MIRROR_DEBUG(1,
 		    "Invalid '%s' field on disk %s (device %s), skipping.",
 		    "md_sectorsize", pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	if ((sc->sc_sectorsize % pp->sectorsize) != 0) {
 		G_MIRROR_DEBUG(1,
 		    "Invalid sector size of disk %s (device %s), skipping.",
 		    pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	if ((md->md_mflags & ~G_MIRROR_DEVICE_FLAG_MASK) != 0) {
 		G_MIRROR_DEBUG(1,
 		    "Invalid device flags on disk %s (device %s), skipping.",
 		    pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	if ((md->md_dflags & ~G_MIRROR_DISK_FLAG_MASK) != 0) {
 		G_MIRROR_DEBUG(1,
 		    "Invalid disk flags on disk %s (device %s), skipping.",
 		    pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	return (0);
 }
 
 int
 g_mirror_add_disk(struct g_mirror_softc *sc, struct g_provider *pp,
     struct g_mirror_metadata *md)
 {
 	struct g_mirror_disk *disk;
 	int error;
 
 	g_topology_assert_not();
 	G_MIRROR_DEBUG(2, "Adding disk %s.", pp->name);
 
 	error = g_mirror_check_metadata(sc, pp, md);
 	if (error != 0)
 		return (error);
 
 	if (md->md_genid < sc->sc_genid) {
 		G_MIRROR_DEBUG(0, "Component %s (device %s) broken, skipping.",
 		    pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 
 	/*
 	 * If the component disk we're tasting has newer metadata than the
 	 * STARTING gmirror device, refresh the device from the component.
 	 */
 	error = g_mirror_refresh_device(sc, pp, md);
 	if (error != 0)
 		return (error);
 
 	disk = g_mirror_init_disk(sc, pp, md, &error);
 	if (disk == NULL)
 		return (error);
 	error = g_mirror_event_send(disk, G_MIRROR_DISK_STATE_NEW,
 	    G_MIRROR_EVENT_WAIT);
 	if (error != 0)
 		return (error);
 	if (md->md_version < G_MIRROR_VERSION) {
 		G_MIRROR_DEBUG(0, "Upgrading metadata on %s (v%d->v%d).",
 		    pp->name, md->md_version, G_MIRROR_VERSION);
 		g_mirror_update_metadata(disk);
 	}
 	return (0);
 }
 
 static void
 g_mirror_destroy_delayed(void *arg, int flag)
 {
 	struct g_mirror_softc *sc;
 	int error;
 
 	if (flag == EV_CANCEL) {
 		G_MIRROR_DEBUG(1, "Destroying canceled.");
 		return;
 	}
 	sc = arg;
 	g_topology_unlock();
 	sx_xlock(&sc->sc_lock);
 	KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) == 0,
 	    ("DESTROY flag set on %s.", sc->sc_name));
 	KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_CLOSEWAIT) != 0,
 	    ("CLOSEWAIT flag not set on %s.", sc->sc_name));
 	G_MIRROR_DEBUG(1, "Destroying %s (delayed).", sc->sc_name);
 	error = g_mirror_destroy(sc, G_MIRROR_DESTROY_SOFT);
 	if (error != 0) {
 		G_MIRROR_DEBUG(0, "Cannot destroy %s (error=%d).",
 		    sc->sc_name, error);
 		sx_xunlock(&sc->sc_lock);
 	}
 	g_topology_lock();
 }
 
 static int
 g_mirror_access(struct g_provider *pp, int acr, int acw, int ace)
 {
 	struct g_mirror_softc *sc;
 	int error = 0;
 
 	g_topology_assert();
 	G_MIRROR_DEBUG(2, "Access request for %s: r%dw%de%d.", pp->name, acr,
 	    acw, ace);
 
 	sc = pp->private;
 	KASSERT(sc != NULL, ("NULL softc (provider=%s).", pp->name));
 
 	g_topology_unlock();
 	sx_xlock(&sc->sc_lock);
 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0 ||
 	    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_CLOSEWAIT) != 0 ||
 	    LIST_EMPTY(&sc->sc_disks)) {
 		if (acr > 0 || acw > 0 || ace > 0)
 			error = ENXIO;
 		goto end;
 	}
 	sc->sc_provider_open += acr + acw + ace;
 	if (pp->acw + acw == 0)
 		g_mirror_idle(sc, 0);
 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_CLOSEWAIT) != 0 &&
 	    sc->sc_provider_open == 0)
 		g_post_event(g_mirror_destroy_delayed, sc, M_WAITOK, sc, NULL);
 end:
 	sx_xunlock(&sc->sc_lock);
 	g_topology_lock();
 	return (error);
 }
 
 static void
 g_mirror_reinit_from_metadata(struct g_mirror_softc *sc,
     const struct g_mirror_metadata *md)
 {
 
 	sc->sc_genid = md->md_genid;
 	sc->sc_syncid = md->md_syncid;
 
 	sc->sc_slice = md->md_slice;
 	sc->sc_balance = md->md_balance;
 	sc->sc_mediasize = md->md_mediasize;
 	sc->sc_ndisks = md->md_all;
 	sc->sc_flags &= ~G_MIRROR_DEVICE_FLAG_MASK;
 	sc->sc_flags |= (md->md_mflags & G_MIRROR_DEVICE_FLAG_MASK);
 }
 
 struct g_geom *
 g_mirror_create(struct g_class *mp, const struct g_mirror_metadata *md,
     u_int type)
 {
 	struct g_mirror_softc *sc;
 	struct g_geom *gp;
 	int error, timeout;
 
 	g_topology_assert();
 	G_MIRROR_DEBUG(1, "Creating device %s (id=%u).", md->md_name,
 	    md->md_mid);
 
 	/* One disk is minimum. */
 	if (md->md_all < 1)
 		return (NULL);
 	/*
 	 * Action geom.
 	 */
 	gp = g_new_geomf(mp, "%s", md->md_name);
 	sc = malloc(sizeof(*sc), M_MIRROR, M_WAITOK | M_ZERO);
 	gp->start = g_mirror_start;
 	gp->orphan = g_mirror_orphan;
 	gp->access = g_mirror_access;
 	gp->dumpconf = g_mirror_dumpconf;
 
 	sc->sc_type = type;
 	sc->sc_id = md->md_mid;
 	g_mirror_reinit_from_metadata(sc, md);
 	sc->sc_sectorsize = md->md_sectorsize;
 	sc->sc_bump_id = 0;
 	sc->sc_idle = 1;
 	sc->sc_last_write = time_uptime;
 	sc->sc_writes = 0;
 	sc->sc_refcnt = 1;
 	sx_init(&sc->sc_lock, "gmirror:lock");
 	TAILQ_INIT(&sc->sc_queue);
 	mtx_init(&sc->sc_queue_mtx, "gmirror:queue", NULL, MTX_DEF);
 	TAILQ_INIT(&sc->sc_regular_delayed);
 	TAILQ_INIT(&sc->sc_inflight);
 	TAILQ_INIT(&sc->sc_sync_delayed);
 	LIST_INIT(&sc->sc_disks);
 	TAILQ_INIT(&sc->sc_events);
 	mtx_init(&sc->sc_events_mtx, "gmirror:events", NULL, MTX_DEF);
 	callout_init(&sc->sc_callout, 1);
 	mtx_init(&sc->sc_done_mtx, "gmirror:done", NULL, MTX_DEF);
 	sc->sc_state = G_MIRROR_DEVICE_STATE_STARTING;
 	gp->softc = sc;
 	sc->sc_geom = gp;
 	sc->sc_provider = NULL;
 	sc->sc_provider_open = 0;
 	/*
 	 * Synchronization geom.
 	 */
 	gp = g_new_geomf(mp, "%s.sync", md->md_name);
 	gp->softc = sc;
 	gp->orphan = g_mirror_orphan;
 	sc->sc_sync.ds_geom = gp;
 	sc->sc_sync.ds_ndisks = 0;
 	error = kproc_create(g_mirror_worker, sc, &sc->sc_worker, 0, 0,
 	    "g_mirror %s", md->md_name);
 	if (error != 0) {
 		G_MIRROR_DEBUG(1, "Cannot create kernel thread for %s.",
 		    sc->sc_name);
 		g_destroy_geom(sc->sc_sync.ds_geom);
 		g_destroy_geom(sc->sc_geom);
 		g_mirror_free_device(sc);
 		return (NULL);
 	}
 
 	G_MIRROR_DEBUG(1, "Device %s created (%u components, id=%u).",
 	    sc->sc_name, sc->sc_ndisks, sc->sc_id);
 
 	sc->sc_rootmount = root_mount_hold("GMIRROR");
 	G_MIRROR_DEBUG(1, "root_mount_hold %p", sc->sc_rootmount);
 	/*
 	 * Run timeout.
 	 */
 	timeout = g_mirror_timeout * hz;
 	callout_reset(&sc->sc_callout, timeout, g_mirror_go, sc);
 	return (sc->sc_geom);
 }
 
 int
 g_mirror_destroy(struct g_mirror_softc *sc, int how)
 {
 	struct g_mirror_disk *disk;
 
 	g_topology_assert_not();
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	if (sc->sc_provider_open != 0) {
 		switch (how) {
 		case G_MIRROR_DESTROY_SOFT:
 			G_MIRROR_DEBUG(1,
 			    "Device %s is still open (%d).", sc->sc_name,
 			    sc->sc_provider_open);
 			return (EBUSY);
 		case G_MIRROR_DESTROY_DELAYED:
 			G_MIRROR_DEBUG(1,
 			    "Device %s will be destroyed on last close.",
 			    sc->sc_name);
 			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 				if (disk->d_state ==
 				    G_MIRROR_DISK_STATE_SYNCHRONIZING) {
 					g_mirror_sync_stop(disk, 1);
 				}
 			}
 			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_CLOSEWAIT;
 			return (EBUSY);
 		case G_MIRROR_DESTROY_HARD:
 			G_MIRROR_DEBUG(1, "Device %s is still open, so it "
 			    "can't be definitely removed.", sc->sc_name);
 		}
 	}
 
 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
 		sx_xunlock(&sc->sc_lock);
 		return (0);
 	}
 	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
 	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DRAIN;
 	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
 	sx_xunlock(&sc->sc_lock);
 	mtx_lock(&sc->sc_queue_mtx);
 	wakeup(sc);
 	mtx_unlock(&sc->sc_queue_mtx);
 	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, &sc->sc_worker);
 	while (sc->sc_worker != NULL)
 		tsleep(&sc->sc_worker, PRIBIO, "m:destroy", hz / 5);
 	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, &sc->sc_worker);
 	sx_xlock(&sc->sc_lock);
 	g_mirror_destroy_device(sc);
 	return (0);
 }
 
 static void
 g_mirror_taste_orphan(struct g_consumer *cp)
 {
 
 	KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
 	    cp->provider->name));
 }
 
 static struct g_geom *
 g_mirror_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
 {
 	struct g_mirror_metadata md;
 	struct g_mirror_softc *sc;
 	struct g_consumer *cp;
 	struct g_geom *gp;
 	int error;
 
 	g_topology_assert();
 	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
 	G_MIRROR_DEBUG(2, "Tasting %s.", pp->name);
 
 	gp = g_new_geomf(mp, "mirror:taste");
 	/*
 	 * This orphan function should be never called.
 	 */
 	gp->orphan = g_mirror_taste_orphan;
 	cp = g_new_consumer(gp);
 	g_attach(cp, pp);
 	error = g_mirror_read_metadata(cp, &md);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 	g_destroy_geom(gp);
 	if (error != 0)
 		return (NULL);
 	gp = NULL;
 
 	if (md.md_provider[0] != '\0' &&
 	    !g_compare_names(md.md_provider, pp->name))
 		return (NULL);
 	if (md.md_provsize != 0 && md.md_provsize != pp->mediasize)
 		return (NULL);
 	if ((md.md_dflags & G_MIRROR_DISK_FLAG_INACTIVE) != 0) {
 		G_MIRROR_DEBUG(0,
 		    "Device %s: provider %s marked as inactive, skipping.",
 		    md.md_name, pp->name);
 		return (NULL);
 	}
 	if (g_mirror_debug >= 2)
 		mirror_metadata_dump(&md);
 
 	/*
 	 * Let's check if device already exists.
 	 */
 	sc = NULL;
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		sc = gp->softc;
 		if (sc == NULL)
 			continue;
 		if (sc->sc_type != G_MIRROR_TYPE_AUTOMATIC)
 			continue;
 		if (sc->sc_sync.ds_geom == gp)
 			continue;
 		if (strcmp(md.md_name, sc->sc_name) != 0)
 			continue;
 		if (md.md_mid != sc->sc_id) {
 			G_MIRROR_DEBUG(0, "Device %s already configured.",
 			    sc->sc_name);
 			return (NULL);
 		}
 		break;
 	}
 	if (gp == NULL) {
 		gp = g_mirror_create(mp, &md, G_MIRROR_TYPE_AUTOMATIC);
 		if (gp == NULL) {
 			G_MIRROR_DEBUG(0, "Cannot create device %s.",
 			    md.md_name);
 			return (NULL);
 		}
 		sc = gp->softc;
 	}
 	G_MIRROR_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
 	g_topology_unlock();
 	sx_xlock(&sc->sc_lock);
 	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_TASTING;
 	error = g_mirror_add_disk(sc, pp, &md);
 	sc->sc_flags &= ~G_MIRROR_DEVICE_FLAG_TASTING;
 	if (error != 0) {
 		G_MIRROR_DEBUG(0, "Cannot add disk %s to %s (error=%d).",
 		    pp->name, gp->name, error);
 		if (LIST_EMPTY(&sc->sc_disks)) {
 			g_cancel_event(sc);
 			g_mirror_destroy(sc, G_MIRROR_DESTROY_HARD);
 			g_topology_lock();
 			return (NULL);
 		}
 		gp = NULL;
 	}
 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
 		g_mirror_destroy(sc, G_MIRROR_DESTROY_HARD);
 		g_topology_lock();
 		return (NULL);
 	}
 	sx_xunlock(&sc->sc_lock);
 	g_topology_lock();
 	return (gp);
 }
 
 static void
 g_mirror_resize(struct g_consumer *cp)
 {
 	struct g_mirror_disk *disk;
 
 	g_topology_assert();
 	g_trace(G_T_TOPOLOGY, "%s(%s)", __func__, cp->provider->name);
 
 	disk = cp->private;
 	if (disk == NULL)
 		return;
 	g_topology_unlock();
 	g_mirror_update_metadata(disk);
 	g_topology_lock();
 }
 
 static int
 g_mirror_destroy_geom(struct gctl_req *req __unused,
     struct g_class *mp __unused, struct g_geom *gp)
 {
 	struct g_mirror_softc *sc;
 	int error;
 
 	g_topology_unlock();
 	sc = gp->softc;
 	sx_xlock(&sc->sc_lock);
 	g_cancel_event(sc);
 	error = g_mirror_destroy(gp->softc, G_MIRROR_DESTROY_SOFT);
 	if (error != 0)
 		sx_xunlock(&sc->sc_lock);
 	g_topology_lock();
 	return (error);
 }
 
 static void
 g_mirror_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
     struct g_consumer *cp, struct g_provider *pp)
 {
 	struct g_mirror_softc *sc;
 
 	g_topology_assert();
 
 	sc = gp->softc;
 	if (sc == NULL)
 		return;
 	/* Skip synchronization geom. */
 	if (gp == sc->sc_sync.ds_geom)
 		return;
 	if (pp != NULL) {
 		/* Nothing here. */
 	} else if (cp != NULL) {
 		struct g_mirror_disk *disk;
 
 		disk = cp->private;
 		if (disk == NULL)
 			return;
 		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)disk->d_id);
 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
 			sbuf_printf(sb, "%s<Synchronized>", indent);
 			if (disk->d_sync.ds_offset == 0)
 				sbuf_cat(sb, "0%");
 			else
 				sbuf_printf(sb, "%u%%",
 				    (u_int)((disk->d_sync.ds_offset * 100) /
 				    sc->sc_mediasize));
 			sbuf_cat(sb, "</Synchronized>\n");
 			if (disk->d_sync.ds_offset > 0)
 				sbuf_printf(sb, "%s<BytesSynced>%jd"
 				    "</BytesSynced>\n", indent,
 				    (intmax_t)disk->d_sync.ds_offset);
 		}
 		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent,
 		    disk->d_sync.ds_syncid);
 		sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent,
 		    disk->d_genid);
 		sbuf_printf(sb, "%s<Flags>", indent);
 		if (disk->d_flags == 0)
 			sbuf_cat(sb, "NONE");
 		else {
 			int first = 1;
 
 #define	ADD_FLAG(flag, name)	do {					\
 	if ((disk->d_flags & (flag)) != 0) {				\
 		if (!first)						\
 			sbuf_cat(sb, ", ");				\
 		else							\
 			first = 0;					\
 		sbuf_cat(sb, name);					\
 	}								\
 } while (0)
 			ADD_FLAG(G_MIRROR_DISK_FLAG_DIRTY, "DIRTY");
 			ADD_FLAG(G_MIRROR_DISK_FLAG_HARDCODED, "HARDCODED");
 			ADD_FLAG(G_MIRROR_DISK_FLAG_INACTIVE, "INACTIVE");
 			ADD_FLAG(G_MIRROR_DISK_FLAG_SYNCHRONIZING,
 			    "SYNCHRONIZING");
 			ADD_FLAG(G_MIRROR_DISK_FLAG_FORCE_SYNC, "FORCE_SYNC");
 			ADD_FLAG(G_MIRROR_DISK_FLAG_BROKEN, "BROKEN");
 #undef	ADD_FLAG
 		}
 		sbuf_cat(sb, "</Flags>\n");
 		sbuf_printf(sb, "%s<Priority>%u</Priority>\n", indent,
 		    disk->d_priority);
 		sbuf_printf(sb, "%s<State>%s</State>\n", indent,
 		    g_mirror_disk_state2str(disk->d_state));
 	} else {
 		sbuf_printf(sb, "%s<Type>", indent);
 		switch (sc->sc_type) {
 		case G_MIRROR_TYPE_AUTOMATIC:
 			sbuf_cat(sb, "AUTOMATIC");
 			break;
 		case G_MIRROR_TYPE_MANUAL:
 			sbuf_cat(sb, "MANUAL");
 			break;
 		default:
 			sbuf_cat(sb, "UNKNOWN");
 			break;
 		}
 		sbuf_cat(sb, "</Type>\n");
 		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id);
 		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent, sc->sc_syncid);
 		sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent, sc->sc_genid);
 		sbuf_printf(sb, "%s<Flags>", indent);
 		if (sc->sc_flags == 0)
 			sbuf_cat(sb, "NONE");
 		else {
 			int first = 1;
 
 #define	ADD_FLAG(flag, name)	do {					\
 	if ((sc->sc_flags & (flag)) != 0) {				\
 		if (!first)						\
 			sbuf_cat(sb, ", ");				\
 		else							\
 			first = 0;					\
 		sbuf_cat(sb, name);					\
 	}								\
 } while (0)
 			ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOFAILSYNC, "NOFAILSYNC");
 			ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOAUTOSYNC, "NOAUTOSYNC");
 #undef	ADD_FLAG
 		}
 		sbuf_cat(sb, "</Flags>\n");
 		sbuf_printf(sb, "%s<Slice>%u</Slice>\n", indent,
 		    (u_int)sc->sc_slice);
 		sbuf_printf(sb, "%s<Balance>%s</Balance>\n", indent,
 		    balance_name(sc->sc_balance));
 		sbuf_printf(sb, "%s<Components>%u</Components>\n", indent,
 		    sc->sc_ndisks);
 		sbuf_printf(sb, "%s<State>", indent);
 		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
 			sbuf_printf(sb, "%s", "STARTING");
 		else if (sc->sc_ndisks ==
 		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE))
 			sbuf_printf(sb, "%s", "COMPLETE");
 		else
 			sbuf_printf(sb, "%s", "DEGRADED");
 		sbuf_cat(sb, "</State>\n");
 	}
 }
 
 static void
 g_mirror_shutdown_post_sync(void *arg, int howto)
 {
 	struct g_class *mp;
 	struct g_geom *gp, *gp2;
 	struct g_mirror_softc *sc;
 	int error;
 
 	if (panicstr != NULL)
 		return;
 
 	mp = arg;
 	g_topology_lock();
 	g_mirror_shutdown = 1;
 	LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) {
 		if ((sc = gp->softc) == NULL)
 			continue;
 		/* Skip synchronization geom. */
 		if (gp == sc->sc_sync.ds_geom)
 			continue;
 		g_topology_unlock();
 		sx_xlock(&sc->sc_lock);
 		g_mirror_idle(sc, -1);
 		g_cancel_event(sc);
 		error = g_mirror_destroy(sc, G_MIRROR_DESTROY_DELAYED);
 		if (error != 0)
 			sx_xunlock(&sc->sc_lock);
 		g_topology_lock();
 	}
 	g_topology_unlock();
 }
 
 static void
 g_mirror_init(struct g_class *mp)
 {
 
 	g_mirror_post_sync = EVENTHANDLER_REGISTER(shutdown_post_sync,
 	    g_mirror_shutdown_post_sync, mp, SHUTDOWN_PRI_FIRST);
 	if (g_mirror_post_sync == NULL)
 		G_MIRROR_DEBUG(0, "Warning! Cannot register shutdown event.");
 }
 
 static void
 g_mirror_fini(struct g_class *mp)
 {
 
 	if (g_mirror_post_sync != NULL)
 		EVENTHANDLER_DEREGISTER(shutdown_post_sync, g_mirror_post_sync);
 }
 
 /*
  * Refresh the mirror device's metadata when gmirror encounters a newer
  * generation as the individual components are being added to the mirror set.
  */
 static int
 g_mirror_refresh_device(struct g_mirror_softc *sc, const struct g_provider *pp,
     const struct g_mirror_metadata *md)
 {
 
 	g_topology_assert_not();
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	KASSERT(sc->sc_genid <= md->md_genid,
 	    ("%s: attempted to refresh from stale component %s (device %s) "
 	    "(%u < %u).", __func__, pp->name, sc->sc_name, md->md_genid,
 	    sc->sc_genid));
 
 	if (sc->sc_genid > md->md_genid || (sc->sc_genid == md->md_genid &&
 	    sc->sc_syncid >= md->md_syncid))
 		return (0);
 
 	G_MIRROR_DEBUG(0, "Found newer version for device %s (genid: curr=%u "
 	    "new=%u; syncid: curr=%u new=%u; ndisks: curr=%u new=%u; "
 	    "provider=%s).", sc->sc_name, sc->sc_genid, md->md_genid,
 	    sc->sc_syncid, md->md_syncid, sc->sc_ndisks, md->md_all, pp->name);
 
 	if (sc->sc_state != G_MIRROR_DEVICE_STATE_STARTING) {
 		/* Probable data corruption detected */
 		G_MIRROR_DEBUG(0, "Cannot refresh metadata in %s state "
 		    "(device=%s genid=%u). A stale mirror device was launched.",
 		    g_mirror_device_state2str(sc->sc_state), sc->sc_name,
 		    sc->sc_genid);
 		return (EINVAL);
 	}
 
 	/* Update softc */
 	g_mirror_reinit_from_metadata(sc, md);
 
 	G_MIRROR_DEBUG(1, "Refresh device %s (id=%u, state=%s) from disk %s "
 	    "(genid=%u syncid=%u md_all=%u).", sc->sc_name, md->md_mid,
 	    g_mirror_device_state2str(sc->sc_state), pp->name, md->md_genid,
 	    md->md_syncid, (unsigned)md->md_all);
 
 	return (0);
 }
 
 DECLARE_GEOM_CLASS(g_mirror_class, g_mirror);
 MODULE_VERSION(geom_mirror, 0);
Index: head/sys/geom/mirror/g_mirror.h
===================================================================
--- head/sys/geom/mirror/g_mirror.h	(revision 350693)
+++ head/sys/geom/mirror/g_mirror.h	(revision 350694)
@@ -1,518 +1,500 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2004-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef	_G_MIRROR_H_
 #define	_G_MIRROR_H_
 
 #include <sys/endian.h>
 #include <sys/md5.h>
 
 #define	G_MIRROR_CLASS_NAME	"MIRROR"
 
 #define	G_MIRROR_MAGIC		"GEOM::MIRROR"
 /*
  * Version history:
  * 0 - Initial version number.
  * 1 - Added 'prefer' balance algorithm.
  * 2 - Added md_genid field to metadata.
  * 3 - Added md_provsize field to metadata.
  * 4 - Added 'no failure synchronization' flag.
  */
 #define	G_MIRROR_VERSION	4
 
 #define	G_MIRROR_BALANCE_NONE		0
 #define	G_MIRROR_BALANCE_ROUND_ROBIN	1
 #define	G_MIRROR_BALANCE_LOAD		2
 #define	G_MIRROR_BALANCE_SPLIT		3
 #define	G_MIRROR_BALANCE_PREFER		4
 #define	G_MIRROR_BALANCE_MIN		G_MIRROR_BALANCE_NONE
 #define	G_MIRROR_BALANCE_MAX		G_MIRROR_BALANCE_PREFER
 
 #define	G_MIRROR_DISK_FLAG_DIRTY		0x0000000000000001ULL
 #define	G_MIRROR_DISK_FLAG_SYNCHRONIZING	0x0000000000000002ULL
 #define	G_MIRROR_DISK_FLAG_FORCE_SYNC		0x0000000000000004ULL
 #define	G_MIRROR_DISK_FLAG_INACTIVE		0x0000000000000008ULL
 #define	G_MIRROR_DISK_FLAG_HARDCODED		0x0000000000000010ULL
 #define	G_MIRROR_DISK_FLAG_BROKEN		0x0000000000000020ULL
 #define	G_MIRROR_DISK_FLAG_CANDELETE		0x0000000000000040ULL
 
 /* Per-disk flags which are recorded in on-disk metadata. */
 #define	G_MIRROR_DISK_FLAG_MASK		(G_MIRROR_DISK_FLAG_DIRTY |	\
 					 G_MIRROR_DISK_FLAG_SYNCHRONIZING | \
 					 G_MIRROR_DISK_FLAG_FORCE_SYNC | \
 					 G_MIRROR_DISK_FLAG_INACTIVE | \
 					 G_MIRROR_DISK_FLAG_CANDELETE)
 
 #define	G_MIRROR_DEVICE_FLAG_NOAUTOSYNC	0x0000000000000001ULL
 #define	G_MIRROR_DEVICE_FLAG_NOFAILSYNC	0x0000000000000002ULL
 
 /* Mirror flags which are recorded in on-disk metadata. */
 #define	G_MIRROR_DEVICE_FLAG_MASK	(G_MIRROR_DEVICE_FLAG_NOAUTOSYNC | \
 					 G_MIRROR_DEVICE_FLAG_NOFAILSYNC)
 
 #ifdef _KERNEL
 #define	G_MIRROR_DEVICE_FLAG_DESTROY	0x0100000000000000ULL
 #define	G_MIRROR_DEVICE_FLAG_DRAIN	0x0200000000000000ULL
 #define	G_MIRROR_DEVICE_FLAG_CLOSEWAIT	0x0400000000000000ULL
 #define	G_MIRROR_DEVICE_FLAG_TASTING	0x0800000000000000ULL
 #define	G_MIRROR_DEVICE_FLAG_WIPE	0x1000000000000000ULL
 
 extern int g_mirror_debug;
 
-#define	G_MIRROR_DEBUG(lvl, ...)	do {				\
-	if (g_mirror_debug >= (lvl)) {					\
-		printf("GEOM_MIRROR");					\
-		if (g_mirror_debug > 0)					\
-			printf("[%u]", lvl);				\
-		printf(": ");						\
-		printf(__VA_ARGS__);					\
-		printf("\n");						\
-	}								\
-} while (0)
-#define	G_MIRROR_LOGREQ(lvl, bp, ...)	do {				\
-	if (g_mirror_debug >= (lvl)) {					\
-		printf("GEOM_MIRROR");					\
-		if (g_mirror_debug > 0)					\
-			printf("[%u]", lvl);				\
-		printf(": ");						\
-		printf(__VA_ARGS__);					\
-		printf(" ");						\
-		g_print_bio(bp);					\
-		printf("\n");						\
-	}								\
-} while (0)
+#define G_MIRROR_DEBUG(lvl, ...) \
+    _GEOM_DEBUG("GEOM_MIRROR", g_mirror_debug, (lvl), NULL, __VA_ARGS__)
+#define G_MIRROR_LOGREQ(lvl, bp, ...) \
+    _GEOM_DEBUG("GEOM_MIRROR", g_mirror_debug, (lvl), (bp), __VA_ARGS__)
 
 #define	G_MIRROR_BIO_FLAG_REGULAR	0x01
 #define	G_MIRROR_BIO_FLAG_SYNC		0x02
 
 /*
  * Informations needed for synchronization.
  */
 struct g_mirror_disk_sync {
 	struct g_consumer *ds_consumer;	/* Consumer connected to our mirror. */
 	off_t		  ds_offset;	/* Offset of next request to send. */
 	off_t		  ds_offset_done; /* Offset of already synchronized
 					   region. */
 	time_t		  ds_update_ts; /* Time of last metadata update. */
 	u_int		  ds_syncid;	/* Disk's synchronization ID. */
 	u_int		  ds_inflight;	/* Number of in-flight sync requests. */
 	struct bio	**ds_bios;	/* BIOs for synchronization I/O. */
 };
 
 /*
  * Informations needed for synchronization.
  */
 struct g_mirror_device_sync {
 	struct g_geom	*ds_geom;	/* Synchronization geom. */
 	u_int		 ds_ndisks;	/* Number of disks in SYNCHRONIZING
 					   state. */
 };
 
 #define	G_MIRROR_DISK_STATE_NONE		0
 #define	G_MIRROR_DISK_STATE_NEW			1
 #define	G_MIRROR_DISK_STATE_ACTIVE		2
 #define	G_MIRROR_DISK_STATE_STALE		3
 #define	G_MIRROR_DISK_STATE_SYNCHRONIZING	4
 #define	G_MIRROR_DISK_STATE_DISCONNECTED	5
 #define	G_MIRROR_DISK_STATE_DESTROY		6
 struct g_mirror_disk {
 	uint32_t	 d_id;		/* Disk ID. */
 	struct g_consumer *d_consumer;	/* Consumer. */
 	struct g_mirror_softc	*d_softc; /* Back-pointer to softc. */
 	int		 d_state;	/* Disk state. */
 	u_int		 d_priority;	/* Disk priority. */
 	u_int		 load;		/* Averaged queue length */
 	off_t		 d_last_offset;	/* Last read offset */
 	uint64_t	 d_flags;	/* Additional flags. */
 	u_int		 d_genid;	/* Disk's generation ID. */
 	struct g_mirror_disk_sync d_sync;/* Sync information. */
 	LIST_ENTRY(g_mirror_disk) d_next;
 	u_int		 d_init_ndisks;	/* Initial number of mirror components */
 	uint32_t	 d_init_slice;	/* Initial slice size */
 	uint8_t		 d_init_balance;/* Initial balance */
 	uint64_t	 d_init_mediasize;/* Initial mediasize */
 };
 #define	d_name	d_consumer->provider->name
 
 #define	G_MIRROR_EVENT_DONTWAIT	0x1
 #define	G_MIRROR_EVENT_WAIT	0x2
 #define	G_MIRROR_EVENT_DEVICE	0x4
 #define	G_MIRROR_EVENT_DONE	0x8
 struct g_mirror_event {
 	struct g_mirror_disk	*e_disk;
 	int			 e_state;
 	int			 e_flags;
 	int			 e_error;
 	TAILQ_ENTRY(g_mirror_event) e_next;
 };
 
 #define	G_MIRROR_DEVICE_STATE_STARTING		0
 #define	G_MIRROR_DEVICE_STATE_RUNNING		1
 
 #define	G_MIRROR_TYPE_MANUAL	0
 #define	G_MIRROR_TYPE_AUTOMATIC	1
 
 /* Bump syncid on first write. */
 #define	G_MIRROR_BUMP_SYNCID		0x1
 /* Bump genid immediately. */
 #define	G_MIRROR_BUMP_GENID		0x2
 /* Bump syncid immediately. */
 #define	G_MIRROR_BUMP_SYNCID_NOW	0x4
 struct g_mirror_softc {
 	u_int		sc_type;	/* Device type (manual/automatic). */
 	u_int		sc_state;	/* Device state. */
 	uint32_t	sc_slice;	/* Slice size. */
 	uint8_t		sc_balance;	/* Balance algorithm. */
 	uint64_t	sc_mediasize;	/* Device size. */
 	uint32_t	sc_sectorsize;	/* Sector size. */
 	uint64_t	sc_flags;	/* Additional flags. */
 
 	struct g_geom	*sc_geom;
 	struct g_provider *sc_provider;
 	int		sc_provider_open;
 
 	uint32_t	sc_id;		/* Mirror unique ID. */
 
 	struct sx	 sc_lock;
 	struct bio_queue sc_queue;
 	struct mtx	 sc_queue_mtx;
 	struct proc	*sc_worker;
 	struct bio_queue sc_inflight; /* In-flight regular write requests. */
 	struct bio_queue sc_regular_delayed; /* Delayed I/O requests due to
 						collision with sync requests. */
 	struct bio_queue sc_sync_delayed; /* Delayed sync requests due to
 					     collision with regular requests. */
 
 	LIST_HEAD(, g_mirror_disk) sc_disks;
 	u_int		sc_ndisks;	/* Number of disks. */
 	struct g_mirror_disk *sc_hint;
 
 	u_int		sc_genid;	/* Generation ID. */
 	u_int		sc_syncid;	/* Synchronization ID. */
 	int		sc_bump_id;
 	struct g_mirror_device_sync sc_sync;
 	int		sc_idle;	/* DIRTY flags removed. */
 	time_t		sc_last_write;
 	u_int		sc_writes;
 	u_int		sc_refcnt;	/* Number of softc references */
 
 	TAILQ_HEAD(, g_mirror_event) sc_events;
 	struct mtx	sc_events_mtx;
 
 	struct callout	sc_callout;
 
 	struct root_hold_token *sc_rootmount;
 
 	struct mtx	 sc_done_mtx;
 };
 #define	sc_name	sc_geom->name
 
 struct g_mirror_metadata;
 
 u_int g_mirror_ndisks(struct g_mirror_softc *sc, int state);
 struct g_geom * g_mirror_create(struct g_class *mp,
     const struct g_mirror_metadata *md, u_int type);
 #define	G_MIRROR_DESTROY_SOFT		0
 #define	G_MIRROR_DESTROY_DELAYED	1
 #define	G_MIRROR_DESTROY_HARD		2
 int g_mirror_destroy(struct g_mirror_softc *sc, int how);
 int g_mirror_event_send(void *arg, int state, int flags);
 struct g_mirror_metadata;
 int g_mirror_add_disk(struct g_mirror_softc *sc, struct g_provider *pp,
     struct g_mirror_metadata *md);
 int g_mirror_read_metadata(struct g_consumer *cp, struct g_mirror_metadata *md);
 void g_mirror_fill_metadata(struct g_mirror_softc *sc,
     struct g_mirror_disk *disk, struct g_mirror_metadata *md);
 void g_mirror_update_metadata(struct g_mirror_disk *disk);
 
 g_ctl_req_t g_mirror_config;
 #endif	/* _KERNEL */
 
 struct g_mirror_metadata {
 	char		md_magic[16];	/* Magic value. */
 	uint32_t	md_version;	/* Version number. */
 	char		md_name[16];	/* Mirror name. */
 	uint32_t	md_mid;		/* Mirror unique ID. */
 	uint32_t	md_did;		/* Disk unique ID. */
 	uint8_t		md_all;		/* Number of disks in mirror. */
 	uint32_t	md_genid;	/* Generation ID. */
 	uint32_t	md_syncid;	/* Synchronization ID. */
 	uint8_t		md_priority;	/* Disk priority. */
 	uint32_t	md_slice;	/* Slice size. */
 	uint8_t		md_balance;	/* Balance type. */
 	uint64_t	md_mediasize;	/* Size of the smallest
 					   disk in mirror. */
 	uint32_t	md_sectorsize;	/* Sector size. */
 	uint64_t	md_sync_offset;	/* Synchronized offset. */
 	uint64_t	md_mflags;	/* Additional mirror flags. */
 	uint64_t	md_dflags;	/* Additional disk flags. */
 	char		md_provider[16]; /* Hardcoded provider. */
 	uint64_t	md_provsize;	/* Provider's size. */
 	u_char		md_hash[16];	/* MD5 hash. */
 };
 static __inline void
 mirror_metadata_encode(struct g_mirror_metadata *md, u_char *data)
 {
 	MD5_CTX ctx;
 
 	bcopy(md->md_magic, data, 16);
 	le32enc(data + 16, md->md_version);
 	bcopy(md->md_name, data + 20, 16);
 	le32enc(data + 36, md->md_mid);
 	le32enc(data + 40, md->md_did);
 	*(data + 44) = md->md_all;
 	le32enc(data + 45, md->md_genid);
 	le32enc(data + 49, md->md_syncid);
 	*(data + 53) = md->md_priority;
 	le32enc(data + 54, md->md_slice);
 	*(data + 58) = md->md_balance;
 	le64enc(data + 59, md->md_mediasize);
 	le32enc(data + 67, md->md_sectorsize);
 	le64enc(data + 71, md->md_sync_offset);
 	le64enc(data + 79, md->md_mflags);
 	le64enc(data + 87, md->md_dflags);
 	bcopy(md->md_provider, data + 95, 16);
 	le64enc(data + 111, md->md_provsize);
 	MD5Init(&ctx);
 	MD5Update(&ctx, data, 119);
 	MD5Final(md->md_hash, &ctx);
 	bcopy(md->md_hash, data + 119, 16);
 }
 static __inline int
 mirror_metadata_decode_v0v1(const u_char *data, struct g_mirror_metadata *md)
 {
 	MD5_CTX ctx;
 
 	bcopy(data + 20, md->md_name, 16);
 	md->md_mid = le32dec(data + 36);
 	md->md_did = le32dec(data + 40);
 	md->md_all = *(data + 44);
 	md->md_syncid = le32dec(data + 45);
 	md->md_priority = *(data + 49);
 	md->md_slice = le32dec(data + 50);
 	md->md_balance = *(data + 54);
 	md->md_mediasize = le64dec(data + 55);
 	md->md_sectorsize = le32dec(data + 63);
 	md->md_sync_offset = le64dec(data + 67);
 	md->md_mflags = le64dec(data + 75);
 	md->md_dflags = le64dec(data + 83);
 	bcopy(data + 91, md->md_provider, 16);
 	bcopy(data + 107, md->md_hash, 16);
 	MD5Init(&ctx);
 	MD5Update(&ctx, data, 107);
 	MD5Final(md->md_hash, &ctx);
 	if (bcmp(md->md_hash, data + 107, 16) != 0)
 		return (EINVAL);
 
 	/* New fields. */
 	md->md_genid = 0;
 	md->md_provsize = 0;
 
 	return (0);
 }
 static __inline int
 mirror_metadata_decode_v2(const u_char *data, struct g_mirror_metadata *md)
 {
 	MD5_CTX ctx;
 
 	bcopy(data + 20, md->md_name, 16);
 	md->md_mid = le32dec(data + 36);
 	md->md_did = le32dec(data + 40);
 	md->md_all = *(data + 44);
 	md->md_genid = le32dec(data + 45);
 	md->md_syncid = le32dec(data + 49);
 	md->md_priority = *(data + 53);
 	md->md_slice = le32dec(data + 54);
 	md->md_balance = *(data + 58);
 	md->md_mediasize = le64dec(data + 59);
 	md->md_sectorsize = le32dec(data + 67);
 	md->md_sync_offset = le64dec(data + 71);
 	md->md_mflags = le64dec(data + 79);
 	md->md_dflags = le64dec(data + 87);
 	bcopy(data + 95, md->md_provider, 16);
 	bcopy(data + 111, md->md_hash, 16);
 	MD5Init(&ctx);
 	MD5Update(&ctx, data, 111);
 	MD5Final(md->md_hash, &ctx);
 	if (bcmp(md->md_hash, data + 111, 16) != 0)
 		return (EINVAL);
 
 	/* New fields. */
 	md->md_provsize = 0;
 
 	return (0);
 }
 static __inline int
 mirror_metadata_decode_v3v4(const u_char *data, struct g_mirror_metadata *md)
 {
 	MD5_CTX ctx;
 
 	bcopy(data + 20, md->md_name, 16);
 	md->md_mid = le32dec(data + 36);
 	md->md_did = le32dec(data + 40);
 	md->md_all = *(data + 44);
 	md->md_genid = le32dec(data + 45);
 	md->md_syncid = le32dec(data + 49);
 	md->md_priority = *(data + 53);
 	md->md_slice = le32dec(data + 54);
 	md->md_balance = *(data + 58);
 	md->md_mediasize = le64dec(data + 59);
 	md->md_sectorsize = le32dec(data + 67);
 	md->md_sync_offset = le64dec(data + 71);
 	md->md_mflags = le64dec(data + 79);
 	md->md_dflags = le64dec(data + 87);
 	bcopy(data + 95, md->md_provider, 16);
 	md->md_provsize = le64dec(data + 111);
 	bcopy(data + 119, md->md_hash, 16);
 	MD5Init(&ctx);
 	MD5Update(&ctx, data, 119);
 	MD5Final(md->md_hash, &ctx);
 	if (bcmp(md->md_hash, data + 119, 16) != 0)
 		return (EINVAL);
 	return (0);
 }
 static __inline int
 mirror_metadata_decode(const u_char *data, struct g_mirror_metadata *md)
 {
 	int error;
 
 	bcopy(data, md->md_magic, 16);
 	md->md_version = le32dec(data + 16);
 	switch (md->md_version) {
 	case 0:
 	case 1:
 		error = mirror_metadata_decode_v0v1(data, md);
 		break;
 	case 2:
 		error = mirror_metadata_decode_v2(data, md);
 		break;
 	case 3:
 	case 4:
 		error = mirror_metadata_decode_v3v4(data, md);
 		break;
 	default:
 		error = EINVAL;
 		break;
 	}
 	return (error);
 }
 
 static __inline const char *
 balance_name(u_int balance)
 {
 	static const char *algorithms[] = {
 		[G_MIRROR_BALANCE_NONE] = "none",
 		[G_MIRROR_BALANCE_ROUND_ROBIN] = "round-robin",
 		[G_MIRROR_BALANCE_LOAD] = "load",
 		[G_MIRROR_BALANCE_SPLIT] = "split",
 		[G_MIRROR_BALANCE_PREFER] = "prefer",
 		[G_MIRROR_BALANCE_MAX + 1] = "unknown"
 	};
 
 	if (balance > G_MIRROR_BALANCE_MAX)
 		balance = G_MIRROR_BALANCE_MAX + 1;
 
 	return (algorithms[balance]);
 }
 
 static __inline int
 balance_id(const char *name)
 {
 	static const char *algorithms[] = {
 		[G_MIRROR_BALANCE_NONE] = "none",
 		[G_MIRROR_BALANCE_ROUND_ROBIN] = "round-robin",
 		[G_MIRROR_BALANCE_LOAD] = "load",
 		[G_MIRROR_BALANCE_SPLIT] = "split",
 		[G_MIRROR_BALANCE_PREFER] = "prefer"
 	};
 	int n;
 
 	for (n = G_MIRROR_BALANCE_MIN; n <= G_MIRROR_BALANCE_MAX; n++) {
 		if (strcmp(name, algorithms[n]) == 0)
 			return (n);
 	}
 	return (-1);
 }
 
 static __inline void
 mirror_metadata_dump(const struct g_mirror_metadata *md)
 {
 	static const char hex[] = "0123456789abcdef";
 	char hash[16 * 2 + 1];
 	u_int i;
 
 	printf("     magic: %s\n", md->md_magic);
 	printf("   version: %u\n", (u_int)md->md_version);
 	printf("      name: %s\n", md->md_name);
 	printf("       mid: %u\n", (u_int)md->md_mid);
 	printf("       did: %u\n", (u_int)md->md_did);
 	printf("       all: %u\n", (u_int)md->md_all);
 	printf("     genid: %u\n", (u_int)md->md_genid);
 	printf("    syncid: %u\n", (u_int)md->md_syncid);
 	printf("  priority: %u\n", (u_int)md->md_priority);
 	printf("     slice: %u\n", (u_int)md->md_slice);
 	printf("   balance: %s\n", balance_name((u_int)md->md_balance));
 	printf(" mediasize: %jd\n", (intmax_t)md->md_mediasize);
 	printf("sectorsize: %u\n", (u_int)md->md_sectorsize);
 	printf("syncoffset: %jd\n", (intmax_t)md->md_sync_offset);
 	printf("    mflags:");
 	if (md->md_mflags == 0)
 		printf(" NONE");
 	else {
 		if ((md->md_mflags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
 			printf(" NOFAILSYNC");
 		if ((md->md_mflags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) != 0)
 			printf(" NOAUTOSYNC");
 	}
 	printf("\n");
 	printf("    dflags:");
 	if (md->md_dflags == 0)
 		printf(" NONE");
 	else {
 		if ((md->md_dflags & G_MIRROR_DISK_FLAG_DIRTY) != 0)
 			printf(" DIRTY");
 		if ((md->md_dflags & G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0)
 			printf(" SYNCHRONIZING");
 		if ((md->md_dflags & G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0)
 			printf(" FORCE_SYNC");
 		if ((md->md_dflags & G_MIRROR_DISK_FLAG_INACTIVE) != 0)
 			printf(" INACTIVE");
 	}
 	printf("\n");
 	printf("hcprovider: %s\n", md->md_provider);
 	printf("  provsize: %ju\n", (uintmax_t)md->md_provsize);
 	bzero(hash, sizeof(hash));
 	for (i = 0; i < 16; i++) {
 		hash[i * 2] = hex[md->md_hash[i] >> 4];
 		hash[i * 2 + 1] = hex[md->md_hash[i] & 0x0f];
 	}
 	printf("  MD5 hash: %s\n", hash);
 }
 #endif	/* !_G_MIRROR_H_ */
Index: head/sys/geom/mirror/g_mirror_ctl.c
===================================================================
--- head/sys/geom/mirror/g_mirror_ctl.c	(revision 350693)
+++ head/sys/geom/mirror/g_mirror_ctl.c	(revision 350694)
@@ -1,1090 +1,1091 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2004-2009 Pawel Jakub Dawidek <pjd@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bio.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/sbuf.h>
 #include <sys/sx.h>
 
 #include <geom/geom.h>
+#include <geom/geom_dbg.h>
 #include <geom/geom_int.h>
 #include <geom/mirror/g_mirror.h>
 
 static struct g_mirror_softc *
 g_mirror_find_device(struct g_class *mp, const char *name)
 {
 	struct g_mirror_softc *sc;
 	struct g_geom *gp;
 
 	g_topology_lock();
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		sc = gp->softc;
 		if (sc == NULL)
 			continue;
 		if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0)
 			continue;
 		if (strcmp(gp->name, name) == 0 ||
 		    strcmp(sc->sc_name, name) == 0) {
 			g_topology_unlock();
 			sx_xlock(&sc->sc_lock);
 			return (sc);
 		}
 	}
 	g_topology_unlock();
 	return (NULL);
 }
 
 static struct g_mirror_disk *
 g_mirror_find_disk(struct g_mirror_softc *sc, const char *name)
 {
 	struct g_mirror_disk *disk;
 
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 	if (strncmp(name, "/dev/", 5) == 0)
 		name += 5;
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (disk->d_consumer == NULL)
 			continue;
 		if (disk->d_consumer->provider == NULL)
 			continue;
 		if (strcmp(disk->d_consumer->provider->name, name) == 0)
 			return (disk);
 	}
 	return (NULL);
 }
 
 static void
 g_mirror_ctl_configure(struct gctl_req *req, struct g_class *mp)
 {
 	struct g_mirror_softc *sc;
 	struct g_mirror_disk *disk;
 	const char *name, *balancep, *prov;
 	intmax_t *slicep, *priority;
 	uint32_t slice;
 	uint8_t balance;
 	int *autosync, *noautosync, *failsync, *nofailsync, *hardcode, *dynamic;
 	int *nargs, do_sync = 0, dirty = 1, do_priority = 0;
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "No '%s' argument.", "nargs");
 		return;
 	}
 	if (*nargs != 1 && *nargs != 2) {
 		gctl_error(req, "Invalid number of arguments.");
 		return;
 	}
 	name = gctl_get_asciiparam(req, "arg0");
 	if (name == NULL) {
 		gctl_error(req, "No 'arg%u' argument.", 0);
 		return;
 	}
 	balancep = gctl_get_asciiparam(req, "balance");
 	if (balancep == NULL) {
 		gctl_error(req, "No '%s' argument.", "balance");
 		return;
 	}
 	autosync = gctl_get_paraml(req, "autosync", sizeof(*autosync));
 	if (autosync == NULL) {
 		gctl_error(req, "No '%s' argument.", "autosync");
 		return;
 	}
 	noautosync = gctl_get_paraml(req, "noautosync", sizeof(*noautosync));
 	if (noautosync == NULL) {
 		gctl_error(req, "No '%s' argument.", "noautosync");
 		return;
 	}
 	failsync = gctl_get_paraml(req, "failsync", sizeof(*failsync));
 	if (failsync == NULL) {
 		gctl_error(req, "No '%s' argument.", "failsync");
 		return;
 	}
 	nofailsync = gctl_get_paraml(req, "nofailsync", sizeof(*nofailsync));
 	if (nofailsync == NULL) {
 		gctl_error(req, "No '%s' argument.", "nofailsync");
 		return;
 	}
 	hardcode = gctl_get_paraml(req, "hardcode", sizeof(*hardcode));
 	if (hardcode == NULL) {
 		gctl_error(req, "No '%s' argument.", "hardcode");
 		return;
 	}
 	dynamic = gctl_get_paraml(req, "dynamic", sizeof(*dynamic));
 	if (dynamic == NULL) {
 		gctl_error(req, "No '%s' argument.", "dynamic");
 		return;
 	}
 	priority = gctl_get_paraml(req, "priority", sizeof(*priority));
 	if (priority == NULL) {
 		gctl_error(req, "No '%s' argument.", "priority");
 		return;
 	}
 	if (*priority < -1 || *priority > 255) {
 		gctl_error(req, "Priority range is 0 to 255, %jd given",
 		    *priority);
 		return;
 	}
 	/* 
 	 * Since we have a priority, we also need a provider now.
 	 * Note: be WARNS safe, by always assigning prov and only throw an
 	 * error if *priority != -1.
 	 */
 	prov = gctl_get_asciiparam(req, "arg1");
 	if (*priority > -1) {
 		if (prov == NULL) {
 			gctl_error(req, "Priority needs a disk name");
 			return;
 		}
 		do_priority = 1;
 	}
 	if (*autosync && *noautosync) {
 		gctl_error(req, "'%s' and '%s' specified.", "autosync",
 		    "noautosync");
 		return;
 	}
 	if (*failsync && *nofailsync) {
 		gctl_error(req, "'%s' and '%s' specified.", "failsync",
 		    "nofailsync");
 		return;
 	}
 	if (*hardcode && *dynamic) {
 		gctl_error(req, "'%s' and '%s' specified.", "hardcode",
 		    "dynamic");
 		return;
 	}
 	sc = g_mirror_find_device(mp, name);
 	if (sc == NULL) {
 		gctl_error(req, "No such device: %s.", name);
 		return;
 	}
 	if (*balancep == '\0')
 		balance = sc->sc_balance;
 	else {
 		if (balance_id(balancep) == -1) {
 			gctl_error(req, "Invalid balance algorithm.");
 			sx_xunlock(&sc->sc_lock);
 			return;
 		}
 		balance = balance_id(balancep);
 	}
 	slicep = gctl_get_paraml(req, "slice", sizeof(*slicep));
 	if (slicep == NULL) {
 		gctl_error(req, "No '%s' argument.", "slice");
 		sx_xunlock(&sc->sc_lock);
 		return;
 	}
 	if (*slicep == -1)
 		slice = sc->sc_slice;
 	else
 		slice = *slicep;
 	/* Enforce usage() of -p not allowing any other options. */
 	if (do_priority && (*autosync || *noautosync || *failsync ||
 	    *nofailsync || *hardcode || *dynamic || *slicep != -1 ||
 	    *balancep != '\0')) {
 		sx_xunlock(&sc->sc_lock);
 		gctl_error(req, "only -p accepted when setting priority");
 		return;
 	}
 	if (sc->sc_balance == balance && sc->sc_slice == slice && !*autosync &&
 	    !*noautosync && !*failsync && !*nofailsync && !*hardcode &&
 	    !*dynamic && !do_priority) {
 		sx_xunlock(&sc->sc_lock);
 		gctl_error(req, "Nothing has changed.");
 		return;
 	}
 	if ((!do_priority && *nargs != 1) || (do_priority && *nargs != 2)) {
 		sx_xunlock(&sc->sc_lock);
 		gctl_error(req, "Invalid number of arguments.");
 		return;
 	}
 	if (g_mirror_ndisks(sc, -1) < sc->sc_ndisks) {
 		sx_xunlock(&sc->sc_lock);
 		gctl_error(req, "Not all disks connected. Try 'forget' command "
 		    "first.");
 		return;
 	}
 	sc->sc_balance = balance;
 	sc->sc_slice = slice;
 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) != 0) {
 		if (*autosync) {
 			sc->sc_flags &= ~G_MIRROR_DEVICE_FLAG_NOAUTOSYNC;
 			do_sync = 1;
 		}
 	} else {
 		if (*noautosync)
 			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_NOAUTOSYNC;
 	}
 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0) {
 		if (*failsync)
 			sc->sc_flags &= ~G_MIRROR_DEVICE_FLAG_NOFAILSYNC;
 	} else {
 		if (*nofailsync) {
 			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_NOFAILSYNC;
 			dirty = 0;
 		}
 	}
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		/*
 		 * Handle priority first, since we only need one disk, do one
 		 * operation on it and then we're done. No need to check other
 		 * flags, as usage doesn't allow it.
 		 */
 		if (do_priority) {
 			if (strcmp(disk->d_name, prov) == 0) {
 				if (disk->d_priority == *priority)
 					gctl_error(req, "Nothing has changed.");
 				else {
 					disk->d_priority = *priority;
 					g_mirror_update_metadata(disk);
 				}
 				break;
 			}
 			continue;
 		}
 		if (do_sync) {
 			if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
 				disk->d_flags &= ~G_MIRROR_DISK_FLAG_FORCE_SYNC;
 		}
 		if (*hardcode)
 			disk->d_flags |= G_MIRROR_DISK_FLAG_HARDCODED;
 		else if (*dynamic)
 			disk->d_flags &= ~G_MIRROR_DISK_FLAG_HARDCODED;
 		if (!dirty)
 			disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
 		g_mirror_update_metadata(disk);
 		if (do_sync) {
 			if (disk->d_state == G_MIRROR_DISK_STATE_STALE) {
 				g_mirror_event_send(disk,
 				    G_MIRROR_DISK_STATE_DISCONNECTED,
 				    G_MIRROR_EVENT_DONTWAIT);
 			}
 		}
 	}
 	sx_xunlock(&sc->sc_lock);
 }
 
 static void
 g_mirror_create_orphan(struct g_consumer *cp)
 {
 
 	KASSERT(1 == 0, ("%s called while creating %s.", __func__,
 	    cp->provider->name));
 }
 
 static void
 g_mirror_ctl_create(struct gctl_req *req, struct g_class *mp)
 {
 	struct g_mirror_metadata md;
 	struct g_geom *gp;
 	struct g_consumer *cp;
 	struct g_provider *pp;
 	struct g_mirror_softc *sc;
 	struct sbuf *sb;
 	const char *name;
 	char param[16];
 	int *nargs;
 	intmax_t *val;
 	int *ival;
 	const char *sval;
 	int bal;
 	unsigned attached, no, sectorsize;
 	off_t mediasize;
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "No '%s' argument.", "nargs");
 		return;
 	}
 	if (*nargs <= 2) {
 		gctl_error(req, "Too few arguments.");
 		return;
 	}
 
 	strlcpy(md.md_magic, G_MIRROR_MAGIC, sizeof(md.md_magic));
 	md.md_version = G_MIRROR_VERSION;
 	name = gctl_get_asciiparam(req, "arg0");
 	if (name == NULL) {
 		gctl_error(req, "No 'arg%u' argument.", 0);
 		return;
 	}
 	strlcpy(md.md_name, name, sizeof(md.md_name));
 	md.md_mid = arc4random();
 	md.md_all = *nargs - 1;
 	md.md_genid = 0;
 	md.md_syncid = 1;
 	md.md_sync_offset = 0;
 	val = gctl_get_paraml(req, "slice", sizeof(*val));
 	if (val == NULL) {
 		gctl_error(req, "No slice argument.");
 		return;
 	}
 	md.md_slice = *val;
 	sval = gctl_get_asciiparam(req, "balance");
 	if (sval == NULL) {
 		gctl_error(req, "No balance argument.");
 		return;
 	}
 	bal = balance_id(sval);
 	if (bal < 0) {
 		gctl_error(req, "Invalid balance algorithm.");
 		return;
 	}
 	md.md_balance = bal;
 	md.md_mflags = 0;
 	md.md_dflags = 0;
 	ival = gctl_get_paraml(req, "noautosync", sizeof(*ival));
 	if (ival != NULL && *ival)
 		md.md_mflags |= G_MIRROR_DEVICE_FLAG_NOAUTOSYNC;
 	ival = gctl_get_paraml(req, "nofailsync", sizeof(*ival));
 	if (ival != NULL && *ival)
 		md.md_mflags |= G_MIRROR_DEVICE_FLAG_NOFAILSYNC;
 	/* These fields not used in manual mode. */
 	bzero(md.md_provider, sizeof(md.md_provider));
 	md.md_provsize = 0;
 
 	g_topology_lock();
 	mediasize = OFF_MAX;
 	sectorsize = 0;
 	gp = g_new_geomf(mp, "%s", md.md_name);
 	gp->orphan = g_mirror_create_orphan;
 	cp = g_new_consumer(gp);
 	for (no = 1; no < *nargs; no++) {
 		snprintf(param, sizeof(param), "arg%u", no);
 		name = gctl_get_asciiparam(req, param);
 		if (name == NULL) {
 			gctl_error(req, "No 'arg%u' argument.", no);
 err:
 			g_destroy_consumer(cp);
 			g_destroy_geom(gp);
 			g_topology_unlock();
 			return;
 		}
 		if (strncmp(name, "/dev/", strlen("/dev/")) == 0)
 			name += strlen("/dev/");
 		pp = g_provider_by_name(name);
 		if (pp == NULL) {
 			G_MIRROR_DEBUG(1, "Disk %s is invalid.", name);
 			gctl_error(req, "Disk %s is invalid.", name);
 			goto err;
 		}
 		g_attach(cp, pp);
 		if (g_access(cp, 1, 0, 0) != 0) {
 			G_MIRROR_DEBUG(1, "Can't open disk %s.", name);
 			gctl_error(req, "Can't open disk %s.", name);
 err2:
 			g_detach(cp);
 			goto err;
 		}
 		if (pp->mediasize == 0 || pp->sectorsize == 0) {
 			G_MIRROR_DEBUG(1, "Disk %s has no media.", name);
 			gctl_error(req, "Disk %s has no media.", name);
 			g_access(cp, -1, 0, 0);
 			goto err2;
 		}
 		if (pp->mediasize < mediasize)
 			mediasize = pp->mediasize;
 		if (pp->sectorsize > sectorsize)
 			sectorsize = pp->sectorsize;
 		g_access(cp, -1, 0, 0);
 		g_detach(cp);
 	}
 	g_destroy_consumer(cp);
 	g_destroy_geom(gp);
 	md.md_mediasize = mediasize;
 	md.md_sectorsize = sectorsize;
 	md.md_mediasize -= (md.md_mediasize % md.md_sectorsize);
 
 	gp = g_mirror_create(mp, &md, G_MIRROR_TYPE_MANUAL);
 	if (gp == NULL) {
 		gctl_error(req, "Can't create %s.", md.md_name);
 		g_topology_unlock();
 		return;
 	}
 
 	sc = gp->softc;
 	g_topology_unlock();
 	sx_xlock(&sc->sc_lock);
 	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_TASTING;
 	sb = sbuf_new_auto();
 	sbuf_printf(sb, "Can't attach disk(s) to %s:", gp->name);
 	for (attached = 0, no = 1; no < *nargs; no++) {
 		snprintf(param, sizeof(param), "arg%u", no);
 		name = gctl_get_asciiparam(req, param);
 		if (strncmp(name, "/dev/", strlen("/dev/")) == 0)
 			name += strlen("/dev/");
 		pp = g_provider_by_name(name);
 		if (pp == NULL) {
 			G_MIRROR_DEBUG(1, "Provider %s disappear?!", name);
 			sbuf_printf(sb, " %s", name);
 			continue;
 		}
 		md.md_did = arc4random();
 		md.md_priority = no - 1;
 		if (g_mirror_add_disk(sc, pp, &md) != 0) {
 			G_MIRROR_DEBUG(1, "Disk %u (%s) not attached to %s.",
 			    no, pp->name, gp->name);
 			sbuf_printf(sb, " %s", pp->name);
 			continue;
 		}
 		attached++;
 	}
 	sbuf_finish(sb);
 	sc->sc_flags &= ~G_MIRROR_DEVICE_FLAG_TASTING;
 	if (md.md_all != attached ||
 	    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
 		g_mirror_destroy(gp->softc, G_MIRROR_DESTROY_HARD);
 		gctl_error(req, "%s", sbuf_data(sb));
 	} else
 		sx_xunlock(&sc->sc_lock);
 	sbuf_delete(sb);
 }
 
 static void
 g_mirror_ctl_rebuild(struct gctl_req *req, struct g_class *mp)
 {
 	struct g_mirror_metadata md;
 	struct g_mirror_softc *sc;
 	struct g_mirror_disk *disk;
 	struct g_provider *pp;
 	const char *name;
 	char param[16];
 	int error, *nargs;
 	u_int i;
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "No '%s' argument.", "nargs");
 		return;
 	}
 	if (*nargs < 2) {
 		gctl_error(req, "Too few arguments.");
 		return;
 	}
 	name = gctl_get_asciiparam(req, "arg0");
 	if (name == NULL) {
 		gctl_error(req, "No 'arg%u' argument.", 0);
 		return;
 	}
 	sc = g_mirror_find_device(mp, name);
 	if (sc == NULL) {
 		gctl_error(req, "No such device: %s.", name);
 		return;
 	}
 	for (i = 1; i < (u_int)*nargs; i++) {
 		snprintf(param, sizeof(param), "arg%u", i);
 		name = gctl_get_asciiparam(req, param);
 		if (name == NULL) {
 			gctl_error(req, "No 'arg%u' argument.", i);
 			continue;
 		}
 		disk = g_mirror_find_disk(sc, name);
 		if (disk == NULL) {
 			gctl_error(req, "No such provider: %s.", name);
 			continue;
 		}
 		if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 1 &&
 		    disk->d_state == G_MIRROR_DISK_STATE_ACTIVE) {
 			/*
 			 * This is the last active disk. There will be nothing
 			 * to rebuild it from, so deny this request.
 			 */
 			gctl_error(req,
 			    "Provider %s is the last active provider in %s.",
 			    name, sc->sc_geom->name);
 			break;
 		}
 		/*
 		 * Do rebuild by resetting syncid, disconnecting the disk and
 		 * connecting it again.
 		 */
 		disk->d_sync.ds_syncid = 0;
 		if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) != 0)
 			disk->d_flags |= G_MIRROR_DISK_FLAG_FORCE_SYNC;
 		g_mirror_update_metadata(disk);
 		pp = disk->d_consumer->provider;
 		g_topology_lock();
 		error = g_mirror_read_metadata(disk->d_consumer, &md);
 		g_topology_unlock();
 		g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
 		    G_MIRROR_EVENT_WAIT);
 		if (error != 0) {
 			gctl_error(req, "Cannot read metadata from %s.",
 			    pp->name);
 			continue;
 		}
 		error = g_mirror_add_disk(sc, pp, &md);
 		if (error != 0) {
 			gctl_error(req, "Cannot reconnect component %s.",
 			    pp->name);
 			continue;
 		}
 	}
 	sx_xunlock(&sc->sc_lock);
 }
 
 static void
 g_mirror_ctl_insert(struct gctl_req *req, struct g_class *mp)
 {
 	struct g_mirror_softc *sc;
 	struct g_mirror_disk *disk;
 	struct g_mirror_metadata md;
 	struct g_provider *pp;
 	struct g_consumer *cp;
 	intmax_t *priority;
 	const char *name;
 	char param[16];
 	u_char *sector;
 	u_int i, n;
 	int error, *nargs, *hardcode, *inactive;
 	struct {
 		struct g_provider	*provider;
 		struct g_consumer	*consumer;
 	} *disks;
 	off_t mdsize;
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "No '%s' argument.", "nargs");
 		return;
 	}
 	if (*nargs < 2) {
 		gctl_error(req, "Too few arguments.");
 		return;
 	}
 	priority = gctl_get_paraml(req, "priority", sizeof(*priority));
 	if (priority == NULL) {
 		gctl_error(req, "No '%s' argument.", "priority");
 		return;
 	}
 	inactive = gctl_get_paraml(req, "inactive", sizeof(*inactive));
 	if (inactive == NULL) {
 		gctl_error(req, "No '%s' argument.", "inactive");
 		return;
 	}
 	hardcode = gctl_get_paraml(req, "hardcode", sizeof(*hardcode));
 	if (hardcode == NULL) {
 		gctl_error(req, "No '%s' argument.", "hardcode");
 		return;
 	}
 	name = gctl_get_asciiparam(req, "arg0");
 	if (name == NULL) {
 		gctl_error(req, "No 'arg%u' argument.", 0);
 		return;
 	}
 	sc = g_mirror_find_device(mp, name);
 	if (sc == NULL) {
 		gctl_error(req, "No such device: %s.", name);
 		return;
 	}
 	if (g_mirror_ndisks(sc, -1) < sc->sc_ndisks) {
 		gctl_error(req, "Not all disks connected.");
 		sx_xunlock(&sc->sc_lock);
 		return;
 	}
 
 	disks = g_malloc(sizeof(*disks) * (*nargs), M_WAITOK | M_ZERO);
 	g_topology_lock();
 	for (i = 1, n = 0; i < (u_int)*nargs; i++) {
 		snprintf(param, sizeof(param), "arg%u", i);
 		name = gctl_get_asciiparam(req, param);
 		if (name == NULL) {
 			gctl_error(req, "No 'arg%u' argument.", i);
 			continue;
 		}
 		if (g_mirror_find_disk(sc, name) != NULL) {
 			gctl_error(req, "Provider %s already inserted.", name);
 			continue;
 		}
 		if (strncmp(name, "/dev/", 5) == 0)
 			name += 5;
 		pp = g_provider_by_name(name);
 		if (pp == NULL) {
 			gctl_error(req, "Unknown provider %s.", name);
 			continue;
 		}
 		cp = g_new_consumer(sc->sc_geom);
 		if (g_attach(cp, pp) != 0) {
 			g_destroy_consumer(cp);
 			gctl_error(req, "Cannot attach to provider %s.", name);
 			continue;
 		}
 		if (g_access(cp, 0, 1, 1) != 0) {
 			gctl_error(req, "Cannot access provider %s.", name);
 err:
 			g_detach(cp);
 			g_destroy_consumer(cp);
 			continue;
 		}
 		mdsize = (sc->sc_type == G_MIRROR_TYPE_AUTOMATIC) ?
 		    pp->sectorsize : 0;
 		if (sc->sc_provider->mediasize > pp->mediasize - mdsize) {
 			gctl_error(req, "Provider %s too small.", name);
 err2:
 			g_access(cp, 0, -1, -1);
 			goto err;
 		}
 		if ((sc->sc_provider->sectorsize % pp->sectorsize) != 0) {
 			gctl_error(req, "Invalid sectorsize of provider %s.",
 			    name);
 			goto err2;
 		}
 		if (sc->sc_type != G_MIRROR_TYPE_AUTOMATIC) {
 			g_access(cp, 0, -1, -1);
 			g_detach(cp);
 			g_destroy_consumer(cp);
 			g_topology_unlock();
 			sc->sc_ndisks++;
 			g_mirror_fill_metadata(sc, NULL, &md);
 			md.md_priority = *priority;
 			if (*inactive)
 				md.md_dflags |= G_MIRROR_DISK_FLAG_INACTIVE;
 			if (g_mirror_add_disk(sc, pp, &md) != 0) {
 				sc->sc_ndisks--;
 				gctl_error(req, "Disk %s not inserted.", name);
 			}
 			g_topology_lock();
 			continue;
 		}
 		disks[n].provider = pp;
 		disks[n].consumer = cp;
 		n++;
 	}
 	if (n == 0) {
 		g_topology_unlock();
 		sx_xunlock(&sc->sc_lock);
 		g_free(disks);
 		return;
 	}
 	sc->sc_ndisks += n;
 again:
 	for (i = 0; i < n; i++) {
 		if (disks[i].consumer == NULL)
 			continue;
 		g_mirror_fill_metadata(sc, NULL, &md);
 		md.md_priority = *priority;
 		if (*inactive)
 			md.md_dflags |= G_MIRROR_DISK_FLAG_INACTIVE;
 		pp = disks[i].provider;
 		if (*hardcode) {
 			strlcpy(md.md_provider, pp->name,
 			    sizeof(md.md_provider));
 		} else {
 			bzero(md.md_provider, sizeof(md.md_provider));
 		}
 		md.md_provsize = pp->mediasize;
 		sector = g_malloc(pp->sectorsize, M_WAITOK);
 		mirror_metadata_encode(&md, sector);
 		error = g_write_data(disks[i].consumer,
 		    pp->mediasize - pp->sectorsize, sector, pp->sectorsize);
 		g_free(sector);
 		if (error != 0) {
 			gctl_error(req, "Cannot store metadata on %s.",
 			    pp->name);
 			g_access(disks[i].consumer, 0, -1, -1);
 			g_detach(disks[i].consumer);
 			g_destroy_consumer(disks[i].consumer);
 			disks[i].consumer = NULL;
 			disks[i].provider = NULL;
 			sc->sc_ndisks--;
 			goto again;
 		}
 	}
 	g_topology_unlock();
 	if (i == 0) {
 		/* All writes failed. */
 		sx_xunlock(&sc->sc_lock);
 		g_free(disks);
 		return;
 	}
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		g_mirror_update_metadata(disk);
 	}
 	/*
 	 * Release provider and wait for retaste.
 	 */
 	g_topology_lock();
 	for (i = 0; i < n; i++) {
 		if (disks[i].consumer == NULL)
 			continue;
 		g_access(disks[i].consumer, 0, -1, -1);
 		g_detach(disks[i].consumer);
 		g_destroy_consumer(disks[i].consumer);
 	}
 	g_topology_unlock();
 	sx_xunlock(&sc->sc_lock);
 	g_free(disks);
 }
 
 static void
 g_mirror_ctl_remove(struct gctl_req *req, struct g_class *mp)
 {
 	struct g_mirror_softc *sc;
 	struct g_mirror_disk *disk;
 	const char *name;
 	char param[16];
 	int *nargs;
 	u_int i, active;
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "No '%s' argument.", "nargs");
 		return;
 	}
 	if (*nargs < 2) {
 		gctl_error(req, "Too few arguments.");
 		return;
 	}
 	name = gctl_get_asciiparam(req, "arg0");
 	if (name == NULL) {
 		gctl_error(req, "No 'arg%u' argument.", 0);
 		return;
 	}
 	sc = g_mirror_find_device(mp, name);
 	if (sc == NULL) {
 		gctl_error(req, "No such device: %s.", name);
 		return;
 	}
 	if (g_mirror_ndisks(sc, -1) < sc->sc_ndisks) {
 		sx_xunlock(&sc->sc_lock);
 		gctl_error(req, "Not all disks connected. Try 'forget' command "
 		    "first.");
 		return;
 	}
 	active = g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE);
 	for (i = 1; i < (u_int)*nargs; i++) {
 		snprintf(param, sizeof(param), "arg%u", i);
 		name = gctl_get_asciiparam(req, param);
 		if (name == NULL) {
 			gctl_error(req, "No 'arg%u' argument.", i);
 			continue;
 		}
 		disk = g_mirror_find_disk(sc, name);
 		if (disk == NULL) {
 			gctl_error(req, "No such provider: %s.", name);
 			continue;
 		}
 		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE) {
 			if (active > 1)
 				active--;
 			else {
 				gctl_error(req, "%s: Can't remove the last "
 				    "ACTIVE component %s.", sc->sc_geom->name,
 				    name);
 				continue;
 			}
 		}
 		g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DESTROY,
 		    G_MIRROR_EVENT_DONTWAIT);
 	}
 	sx_xunlock(&sc->sc_lock);
 }
 
 static void
 g_mirror_ctl_resize(struct gctl_req *req, struct g_class *mp)
 {
 	struct g_mirror_softc *sc;
 	struct g_mirror_disk *disk;
 	uint64_t mediasize;
 	const char *name, *s;
 	char *x;
 	int *nargs;
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "No '%s' argument.", "nargs");
 		return;
 	}
 	if (*nargs != 1) {
 		gctl_error(req, "Missing device.");
 		return;
 	}
 	name = gctl_get_asciiparam(req, "arg0");
 	if (name == NULL) {
 		gctl_error(req, "No 'arg%u' argument.", 0);
 		return;
 	}
 	s = gctl_get_asciiparam(req, "size");
 	if (s == NULL) {
 		gctl_error(req, "No '%s' argument.", "size");
 		return;
 	}
 	mediasize = strtouq(s, &x, 0);
 	if (*x != '\0' || mediasize == 0) {
 		gctl_error(req, "Invalid '%s' argument.", "size");
 		return;
 	}
 	sc = g_mirror_find_device(mp, name);
 	if (sc == NULL) {
 		gctl_error(req, "No such device: %s.", name);
 		return;
 	}
 	/* Deny shrinking of an opened provider */
 	if ((g_debugflags & 16) == 0 && sc->sc_provider_open > 0) {
 		if (sc->sc_mediasize > mediasize) {
 			gctl_error(req, "Device %s is busy.",
 			    sc->sc_provider->name);
 			sx_xunlock(&sc->sc_lock);
 			return;
 		}
 	}
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (mediasize > disk->d_consumer->provider->mediasize -
 		    disk->d_consumer->provider->sectorsize) {
 			gctl_error(req, "Provider %s is too small.",
 			    disk->d_name);
 			sx_xunlock(&sc->sc_lock);
 			return;
 		}
 	}
 	/* Update the size. */
 	sc->sc_mediasize = mediasize;
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		g_mirror_update_metadata(disk);
 	}
 	g_topology_lock();
 	g_resize_provider(sc->sc_provider, mediasize);
 	g_topology_unlock();
 	sx_xunlock(&sc->sc_lock);
 }
 
 static void
 g_mirror_ctl_deactivate(struct gctl_req *req, struct g_class *mp)
 {
 	struct g_mirror_softc *sc;
 	struct g_mirror_disk *disk;
 	const char *name;
 	char param[16];
 	int *nargs;
 	u_int i, active;
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "No '%s' argument.", "nargs");
 		return;
 	}
 	if (*nargs < 2) {
 		gctl_error(req, "Too few arguments.");
 		return;
 	}
 	name = gctl_get_asciiparam(req, "arg0");
 	if (name == NULL) {
 		gctl_error(req, "No 'arg%u' argument.", 0);
 		return;
 	}
 	sc = g_mirror_find_device(mp, name);
 	if (sc == NULL) {
 		gctl_error(req, "No such device: %s.", name);
 		return;
 	}
 	active = g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE);
 	for (i = 1; i < (u_int)*nargs; i++) {
 		snprintf(param, sizeof(param), "arg%u", i);
 		name = gctl_get_asciiparam(req, param);
 		if (name == NULL) {
 			gctl_error(req, "No 'arg%u' argument.", i);
 			continue;
 		}
 		disk = g_mirror_find_disk(sc, name);
 		if (disk == NULL) {
 			gctl_error(req, "No such provider: %s.", name);
 			continue;
 		}
 		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE) {
 			if (active > 1)
 				active--;
 			else {
 				gctl_error(req, "%s: Can't deactivate the "
 				    "last ACTIVE component %s.",
 				    sc->sc_geom->name, name);
 				continue;
 			}
 		}
 		disk->d_flags |= G_MIRROR_DISK_FLAG_INACTIVE;
 		disk->d_flags &= ~G_MIRROR_DISK_FLAG_FORCE_SYNC;
 		g_mirror_update_metadata(disk);
 		sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
 		g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
 		    G_MIRROR_EVENT_DONTWAIT);
 	}
 	sx_xunlock(&sc->sc_lock);
 }
 
 static void
 g_mirror_ctl_forget(struct gctl_req *req, struct g_class *mp)
 {
 	struct g_mirror_softc *sc;
 	struct g_mirror_disk *disk;
 	const char *name;
 	char param[16];
 	int *nargs;
 	u_int i;
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "No '%s' argument.", "nargs");
 		return;
 	}
 	if (*nargs < 1) {
 		gctl_error(req, "Missing device(s).");
 		return;
 	}
 
 	for (i = 0; i < (u_int)*nargs; i++) {
 		snprintf(param, sizeof(param), "arg%u", i);
 		name = gctl_get_asciiparam(req, param);
 		if (name == NULL) {
 			gctl_error(req, "No 'arg%u' argument.", i);
 			return;
 		}
 		sc = g_mirror_find_device(mp, name);
 		if (sc == NULL) {
 			gctl_error(req, "No such device: %s.", name);
 			return;
 		}
 		if (g_mirror_ndisks(sc, -1) == sc->sc_ndisks) {
 			sx_xunlock(&sc->sc_lock);
 			G_MIRROR_DEBUG(1,
 			    "All disks connected in %s, skipping.",
 			    sc->sc_name);
 			continue;
 		}
 		sc->sc_ndisks = g_mirror_ndisks(sc, -1);
 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 			g_mirror_update_metadata(disk);
 		}
 		sx_xunlock(&sc->sc_lock);
 	}
 }
 
 static void
 g_mirror_ctl_stop(struct gctl_req *req, struct g_class *mp, int wipe)
 {
 	struct g_mirror_softc *sc;
 	int *force, *nargs, error;
 	const char *name;
 	char param[16];
 	u_int i;
 	int how;
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "No '%s' argument.", "nargs");
 		return;
 	}
 	if (*nargs < 1) {
 		gctl_error(req, "Missing device(s).");
 		return;
 	}
 	force = gctl_get_paraml(req, "force", sizeof(*force));
 	if (force == NULL) {
 		gctl_error(req, "No '%s' argument.", "force");
 		return;
 	}
 	if (*force)
 		how = G_MIRROR_DESTROY_HARD;
 	else
 		how = G_MIRROR_DESTROY_SOFT;
 
 	for (i = 0; i < (u_int)*nargs; i++) {
 		snprintf(param, sizeof(param), "arg%u", i);
 		name = gctl_get_asciiparam(req, param);
 		if (name == NULL) {
 			gctl_error(req, "No 'arg%u' argument.", i);
 			return;
 		}
 		sc = g_mirror_find_device(mp, name);
 		if (sc == NULL) {
 			gctl_error(req, "No such device: %s.", name);
 			return;
 		}
 		g_cancel_event(sc);
 		if (wipe)
 			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_WIPE;
 		error = g_mirror_destroy(sc, how);
 		if (error != 0) {
 			gctl_error(req, "Cannot destroy device %s (error=%d).",
 			    sc->sc_geom->name, error);
 			if (wipe)
 				sc->sc_flags &= ~G_MIRROR_DEVICE_FLAG_WIPE;
 			sx_xunlock(&sc->sc_lock);
 			return;
 		}
 		/* No need to unlock, because lock is already dead. */
 	}
 }
 
 void
 g_mirror_config(struct gctl_req *req, struct g_class *mp, const char *verb)
 {
 	uint32_t *version;
 
 	g_topology_assert();
 
 	version = gctl_get_paraml(req, "version", sizeof(*version));
 	if (version == NULL) {
 		gctl_error(req, "No '%s' argument.", "version");
 		return;
 	}
 	if (*version != G_MIRROR_VERSION) {
 		gctl_error(req, "Userland and kernel parts are out of sync.");
 		return;
 	}
 
 	g_topology_unlock();
 	if (strcmp(verb, "configure") == 0)
 		g_mirror_ctl_configure(req, mp);
 	else if (strcmp(verb, "create") == 0)
 		g_mirror_ctl_create(req, mp);
 	else if (strcmp(verb, "rebuild") == 0)
 		g_mirror_ctl_rebuild(req, mp);
 	else if (strcmp(verb, "insert") == 0)
 		g_mirror_ctl_insert(req, mp);
 	else if (strcmp(verb, "remove") == 0)
 		g_mirror_ctl_remove(req, mp);
 	else if (strcmp(verb, "resize") == 0)
 		g_mirror_ctl_resize(req, mp);
 	else if (strcmp(verb, "deactivate") == 0)
 		g_mirror_ctl_deactivate(req, mp);
 	else if (strcmp(verb, "forget") == 0)
 		g_mirror_ctl_forget(req, mp);
 	else if (strcmp(verb, "stop") == 0)
 		g_mirror_ctl_stop(req, mp, 0);
 	else if (strcmp(verb, "destroy") == 0)
 		g_mirror_ctl_stop(req, mp, 1);
 	else
 		gctl_error(req, "Unknown verb.");
 	g_topology_lock();
 }
Index: head/sys/geom/mountver/g_mountver.c
===================================================================
--- head/sys/geom/mountver/g_mountver.c	(revision 350693)
+++ head/sys/geom/mountver/g_mountver.c	(revision 350694)
@@ -1,663 +1,664 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2010 Edward Tomasz Napierala <trasz@FreeBSD.org>
  * Copyright (c) 2004-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/bio.h>
 #include <sys/disk.h>
 #include <sys/proc.h>
 #include <sys/sbuf.h>
 #include <sys/sysctl.h>
 #include <sys/malloc.h>
 #include <sys/eventhandler.h>
 #include <geom/geom.h>
+#include <geom/geom_dbg.h>
 #include <geom/mountver/g_mountver.h>
 
 
 SYSCTL_DECL(_kern_geom);
 static SYSCTL_NODE(_kern_geom, OID_AUTO, mountver, CTLFLAG_RW,
     0, "GEOM_MOUNTVER stuff");
 static u_int g_mountver_debug = 0;
 static u_int g_mountver_check_ident = 1;
 SYSCTL_UINT(_kern_geom_mountver, OID_AUTO, debug, CTLFLAG_RW,
     &g_mountver_debug, 0, "Debug level");
 SYSCTL_UINT(_kern_geom_mountver, OID_AUTO, check_ident, CTLFLAG_RW,
     &g_mountver_check_ident, 0, "Check disk ident when reattaching");
 
 static eventhandler_tag g_mountver_pre_sync = NULL;
 
 static void g_mountver_queue(struct bio *bp);
 static void g_mountver_orphan(struct g_consumer *cp);
 static void g_mountver_resize(struct g_consumer *cp);
 static int g_mountver_destroy(struct g_geom *gp, boolean_t force);
 static g_taste_t g_mountver_taste;
 static int g_mountver_destroy_geom(struct gctl_req *req, struct g_class *mp,
     struct g_geom *gp);
 static void g_mountver_config(struct gctl_req *req, struct g_class *mp,
     const char *verb);
 static void g_mountver_dumpconf(struct sbuf *sb, const char *indent,
     struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp);
 static void g_mountver_init(struct g_class *mp);
 static void g_mountver_fini(struct g_class *mp);
 
 struct g_class g_mountver_class = {
 	.name = G_MOUNTVER_CLASS_NAME,
 	.version = G_VERSION,
 	.ctlreq = g_mountver_config,
 	.taste = g_mountver_taste,
 	.destroy_geom = g_mountver_destroy_geom,
 	.init = g_mountver_init,
 	.fini = g_mountver_fini
 };
 
 static void
 g_mountver_done(struct bio *bp)
 {
 	struct g_geom *gp;
 	struct bio *pbp;
 
 	if (bp->bio_error != ENXIO) {
 		g_std_done(bp);
 		return;
 	}
 
 	/*
 	 * When the device goes away, it's possible that few requests
 	 * will be completed with ENXIO before g_mountver_orphan()
 	 * gets called.  To work around that, we have to queue requests
 	 * that failed with ENXIO, in order to send them later.
 	 */
 	gp = bp->bio_from->geom;
 
 	pbp = bp->bio_parent;
 	KASSERT(pbp->bio_to == LIST_FIRST(&gp->provider),
 	    ("parent request was for someone else"));
 	g_destroy_bio(bp);
 	pbp->bio_inbed++;
 	g_mountver_queue(pbp);
 }
 
 static void
 g_mountver_send(struct bio *bp)
 {
 	struct g_geom *gp;
 	struct bio *cbp;
 
 	gp = bp->bio_to->geom;
 
 	cbp = g_clone_bio(bp);
 	if (cbp == NULL) {
 		g_io_deliver(bp, ENOMEM);
 		return;
 	}
 
 	cbp->bio_done = g_mountver_done;
 	g_io_request(cbp, LIST_FIRST(&gp->consumer));
 }
 
 static void
 g_mountver_queue(struct bio *bp)
 {
 	struct g_mountver_softc *sc;
 	struct g_geom *gp;
 
 	gp = bp->bio_to->geom;
 	sc = gp->softc;
 
 	mtx_lock(&sc->sc_mtx);
 	TAILQ_INSERT_TAIL(&sc->sc_queue, bp, bio_queue);
 	mtx_unlock(&sc->sc_mtx);
 }
 
 static void
 g_mountver_send_queued(struct g_geom *gp)
 {
 	struct g_mountver_softc *sc;
 	struct bio *bp;
 
 	sc = gp->softc;
 
 	mtx_lock(&sc->sc_mtx);
 	while ((bp = TAILQ_FIRST(&sc->sc_queue)) != NULL) {
 		TAILQ_REMOVE(&sc->sc_queue, bp, bio_queue);
 		G_MOUNTVER_LOGREQ(bp, "Sending queued request.");
 		g_mountver_send(bp);
 	}
 	mtx_unlock(&sc->sc_mtx);
 }
 
 static void
 g_mountver_discard_queued(struct g_geom *gp)
 {
 	struct g_mountver_softc *sc;
 	struct bio *bp;
 
 	sc = gp->softc;
 
 	mtx_lock(&sc->sc_mtx);
 	while ((bp = TAILQ_FIRST(&sc->sc_queue)) != NULL) {
 		TAILQ_REMOVE(&sc->sc_queue, bp, bio_queue);
 		G_MOUNTVER_LOGREQ(bp, "Discarding queued request.");
 		g_io_deliver(bp, ENXIO);
 	}
 	mtx_unlock(&sc->sc_mtx);
 }
 
 static void
 g_mountver_start(struct bio *bp)
 {
 	struct g_mountver_softc *sc;
 	struct g_geom *gp;
 
 	gp = bp->bio_to->geom;
 	sc = gp->softc;
 	G_MOUNTVER_LOGREQ(bp, "Request received.");
 
 	/*
 	 * It is possible that some bios were returned with ENXIO, even though
 	 * orphaning didn't happen yet.  In that case, queue all subsequent
 	 * requests in order to maintain ordering.
 	 */
 	if (sc->sc_orphaned || !TAILQ_EMPTY(&sc->sc_queue)) {
 		if (sc->sc_shutting_down) {
 			G_MOUNTVER_LOGREQ(bp, "Discarding request due to shutdown.");
 			g_io_deliver(bp, ENXIO);
 			return;
 		}
 		G_MOUNTVER_LOGREQ(bp, "Queueing request.");
 		g_mountver_queue(bp);
 		if (!sc->sc_orphaned)
 			g_mountver_send_queued(gp);
 	} else {
 		G_MOUNTVER_LOGREQ(bp, "Sending request.");
 		g_mountver_send(bp);
 	}
 }
 
 static int
 g_mountver_access(struct g_provider *pp, int dr, int dw, int de)
 {
 	struct g_mountver_softc *sc;
 	struct g_geom *gp;
 	struct g_consumer *cp;
 
 	g_topology_assert();
 
 	gp = pp->geom;
 	cp = LIST_FIRST(&gp->consumer);
 	sc = gp->softc;
 	if (sc == NULL && dr <= 0 && dw <= 0 && de <= 0)
 		return (0);
 	KASSERT(sc != NULL, ("Trying to access withered provider \"%s\".", pp->name));
 
 	sc->sc_access_r += dr;
 	sc->sc_access_w += dw;
 	sc->sc_access_e += de;
 
 	if (sc->sc_orphaned)
 		return (0);
 
 	return (g_access(cp, dr, dw, de));
 }
 
 static int
 g_mountver_create(struct gctl_req *req, struct g_class *mp, struct g_provider *pp)
 {
 	struct g_mountver_softc *sc;
 	struct g_geom *gp;
 	struct g_provider *newpp;
 	struct g_consumer *cp;
 	char name[64];
 	int error;
 	int identsize = DISK_IDENT_SIZE;
 
 	g_topology_assert();
 
 	gp = NULL;
 	newpp = NULL;
 	cp = NULL;
 
 	snprintf(name, sizeof(name), "%s%s", pp->name, G_MOUNTVER_SUFFIX);
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		if (strcmp(gp->name, name) == 0) {
 			gctl_error(req, "Provider %s already exists.", name);
 			return (EEXIST);
 		}
 	}
 	gp = g_new_geomf(mp, "%s", name);
 	sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO);
 	mtx_init(&sc->sc_mtx, "gmountver", NULL, MTX_DEF | MTX_RECURSE);
 	TAILQ_INIT(&sc->sc_queue);
 	sc->sc_provider_name = strdup(pp->name, M_GEOM);
 	gp->softc = sc;
 	gp->start = g_mountver_start;
 	gp->orphan = g_mountver_orphan;
 	gp->resize = g_mountver_resize;
 	gp->access = g_mountver_access;
 	gp->dumpconf = g_mountver_dumpconf;
 
 	newpp = g_new_providerf(gp, "%s", gp->name);
 	newpp->mediasize = pp->mediasize;
 	newpp->sectorsize = pp->sectorsize;
 	newpp->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE;
 
 	if ((pp->flags & G_PF_ACCEPT_UNMAPPED) != 0) {
 		G_MOUNTVER_DEBUG(0, "Unmapped supported for %s.", gp->name);
 		newpp->flags |= G_PF_ACCEPT_UNMAPPED;
 	} else {
 		G_MOUNTVER_DEBUG(0, "Unmapped unsupported for %s.", gp->name);
 		newpp->flags &= ~G_PF_ACCEPT_UNMAPPED;
 	}
 
 	cp = g_new_consumer(gp);
 	cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
 	error = g_attach(cp, pp);
 	if (error != 0) {
 		gctl_error(req, "Cannot attach to provider %s.", pp->name);
 		goto fail;
 	}
 	error = g_access(cp, 1, 0, 0);
 	if (error != 0) {
 		gctl_error(req, "Cannot access provider %s.", pp->name);
 		goto fail;
 	}
 	error = g_io_getattr("GEOM::ident", cp, &identsize, sc->sc_ident);
 	g_access(cp, -1, 0, 0);
 	if (error != 0) {
 		if (g_mountver_check_ident) {
 			gctl_error(req, "Cannot get disk ident from %s; error = %d.", pp->name, error);
 			goto fail;
 		}
 
 		G_MOUNTVER_DEBUG(0, "Cannot get disk ident from %s; error = %d.", pp->name, error);
 		sc->sc_ident[0] = '\0';
 	}
 
 	g_error_provider(newpp, 0);
 	G_MOUNTVER_DEBUG(0, "Device %s created.", gp->name);
 	return (0);
 fail:
 	g_free(sc->sc_provider_name);
 	if (cp->provider != NULL)
 		g_detach(cp);
 	g_destroy_consumer(cp);
 	g_destroy_provider(newpp);
 	g_free(gp->softc);
 	g_destroy_geom(gp);
 	return (error);
 }
 
 static int
 g_mountver_destroy(struct g_geom *gp, boolean_t force)
 {
 	struct g_mountver_softc *sc;
 	struct g_provider *pp;
 
 	g_topology_assert();
 	if (gp->softc == NULL)
 		return (ENXIO);
 	sc = gp->softc;
 	pp = LIST_FIRST(&gp->provider);
 	if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
 		if (force) {
 			G_MOUNTVER_DEBUG(0, "Device %s is still open, so it "
 			    "can't be definitely removed.", pp->name);
 		} else {
 			G_MOUNTVER_DEBUG(1, "Device %s is still open (r%dw%de%d).",
 			    pp->name, pp->acr, pp->acw, pp->ace);
 			return (EBUSY);
 		}
 	} else {
 		G_MOUNTVER_DEBUG(0, "Device %s removed.", gp->name);
 	}
 	if (pp != NULL)
 		g_wither_provider(pp, ENXIO);
 	g_mountver_discard_queued(gp);
 	g_free(sc->sc_provider_name);
 	g_free(gp->softc);
 	gp->softc = NULL;
 	g_wither_geom(gp, ENXIO);
 
 	return (0);
 }
 
 static int
 g_mountver_destroy_geom(struct gctl_req *req, struct g_class *mp, struct g_geom *gp)
 {
 
 	return (g_mountver_destroy(gp, 0));
 }
 
 static void
 g_mountver_ctl_create(struct gctl_req *req, struct g_class *mp)
 {
 	struct g_provider *pp;
 	const char *name;
 	char param[16];
 	int i, *nargs;
 
 	g_topology_assert();
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "No '%s' argument", "nargs");
 		return;
 	}
 	if (*nargs <= 0) {
 		gctl_error(req, "Missing device(s).");
 		return;
 	}
 	for (i = 0; i < *nargs; i++) {
 		snprintf(param, sizeof(param), "arg%d", i);
 		name = gctl_get_asciiparam(req, param);
 		if (name == NULL) {
 			gctl_error(req, "No 'arg%d' argument", i);
 			return;
 		}
 		if (strncmp(name, "/dev/", strlen("/dev/")) == 0)
 			name += strlen("/dev/");
 		pp = g_provider_by_name(name);
 		if (pp == NULL) {
 			G_MOUNTVER_DEBUG(1, "Provider %s is invalid.", name);
 			gctl_error(req, "Provider %s is invalid.", name);
 			return;
 		}
 		if (g_mountver_create(req, mp, pp) != 0)
 			return;
 	}
 }
 
 static struct g_geom *
 g_mountver_find_geom(struct g_class *mp, const char *name)
 {
 	struct g_geom *gp;
 
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		if (strcmp(gp->name, name) == 0)
 			return (gp);
 	}
 	return (NULL);
 }
 
 static void
 g_mountver_ctl_destroy(struct gctl_req *req, struct g_class *mp)
 {
 	int *nargs, *force, error, i;
 	struct g_geom *gp;
 	const char *name;
 	char param[16];
 
 	g_topology_assert();
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "No '%s' argument", "nargs");
 		return;
 	}
 	if (*nargs <= 0) {
 		gctl_error(req, "Missing device(s).");
 		return;
 	}
 	force = gctl_get_paraml(req, "force", sizeof(*force));
 	if (force == NULL) {
 		gctl_error(req, "No 'force' argument");
 		return;
 	}
 
 	for (i = 0; i < *nargs; i++) {
 		snprintf(param, sizeof(param), "arg%d", i);
 		name = gctl_get_asciiparam(req, param);
 		if (name == NULL) {
 			gctl_error(req, "No 'arg%d' argument", i);
 			return;
 		}
 		if (strncmp(name, "/dev/", strlen("/dev/")) == 0)
 			name += strlen("/dev/");
 		gp = g_mountver_find_geom(mp, name);
 		if (gp == NULL) {
 			G_MOUNTVER_DEBUG(1, "Device %s is invalid.", name);
 			gctl_error(req, "Device %s is invalid.", name);
 			return;
 		}
 		error = g_mountver_destroy(gp, *force);
 		if (error != 0) {
 			gctl_error(req, "Cannot destroy device %s (error=%d).",
 			    gp->name, error);
 			return;
 		}
 	}
 }
 
 static void
 g_mountver_orphan(struct g_consumer *cp)
 {
 	struct g_mountver_softc *sc;
 
 	g_topology_assert();
 
 	sc = cp->geom->softc;
 	sc->sc_orphaned = 1;
 	if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
 		g_access(cp, -cp->acr, -cp->acw, -cp->ace);
 	g_detach(cp);
 	G_MOUNTVER_DEBUG(0, "%s is offline.  Mount verification in progress.", sc->sc_provider_name);
 }
 
 static void
 g_mountver_resize(struct g_consumer *cp)
 {
 	struct g_geom *gp;
 	struct g_provider *pp;
 
 	gp = cp->geom;
 
 	LIST_FOREACH(pp, &gp->provider, provider)
 		g_resize_provider(pp, cp->provider->mediasize);
 }
 
 static int
 g_mountver_ident_matches(struct g_geom *gp)
 {
 	struct g_consumer *cp;
 	struct g_mountver_softc *sc;
 	char ident[DISK_IDENT_SIZE];
 	int error, identsize = DISK_IDENT_SIZE;
 
 	sc = gp->softc;
 	cp = LIST_FIRST(&gp->consumer);
 
 	if (g_mountver_check_ident == 0)
 		return (0);
 
 	error = g_access(cp, 1, 0, 0);
 	if (error != 0) {
 		G_MOUNTVER_DEBUG(0, "Cannot access %s; "
 		    "not attaching; error = %d.", gp->name, error);
 		return (1);
 	}
 	error = g_io_getattr("GEOM::ident", cp, &identsize, ident);
 	g_access(cp, -1, 0, 0);
 	if (error != 0) {
 		G_MOUNTVER_DEBUG(0, "Cannot get disk ident for %s; "
 		    "not attaching; error = %d.", gp->name, error);
 		return (1);
 	}
 	if (strcmp(ident, sc->sc_ident) != 0) {
 		G_MOUNTVER_DEBUG(1, "Disk ident for %s (\"%s\") is different "
 		    "from expected \"%s\", not attaching.", gp->name, ident,
 		    sc->sc_ident);
 		return (1);
 	}
 
 	return (0);
 }
 	
 static struct g_geom *
 g_mountver_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
 {
 	struct g_mountver_softc *sc;
 	struct g_consumer *cp;
 	struct g_geom *gp;
 	int error;
 
 	g_topology_assert();
 	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
 	G_MOUNTVER_DEBUG(2, "Tasting %s.", pp->name);
 
 	/*
 	 * Let's check if device already exists.
 	 */
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		sc = gp->softc;
 		if (sc == NULL)
 			continue;
 
 		/* Already attached? */
 		if (pp == LIST_FIRST(&gp->provider))
 			return (NULL);
 
 		if (sc->sc_orphaned && strcmp(pp->name, sc->sc_provider_name) == 0)
 			break;
 	}
 	if (gp == NULL)
 		return (NULL);
 
 	cp = LIST_FIRST(&gp->consumer);
 	g_attach(cp, pp);
 	error = g_mountver_ident_matches(gp);
 	if (error != 0) {
 		g_detach(cp);
 		return (NULL);
 	}
 	if (sc->sc_access_r > 0 || sc->sc_access_w > 0 || sc->sc_access_e > 0) {
 		error = g_access(cp, sc->sc_access_r, sc->sc_access_w, sc->sc_access_e);
 		if (error != 0) {
 			G_MOUNTVER_DEBUG(0, "Cannot access %s; error = %d.", pp->name, error);
 			g_detach(cp);
 			return (NULL);
 		}
 	}
 	g_mountver_send_queued(gp);
 	sc->sc_orphaned = 0;
 	G_MOUNTVER_DEBUG(0, "%s has completed mount verification.", sc->sc_provider_name);
 
 	return (gp);
 }
 
 static void
 g_mountver_config(struct gctl_req *req, struct g_class *mp, const char *verb)
 {
 	uint32_t *version;
 
 	g_topology_assert();
 
 	version = gctl_get_paraml(req, "version", sizeof(*version));
 	if (version == NULL) {
 		gctl_error(req, "No '%s' argument.", "version");
 		return;
 	}
 	if (*version != G_MOUNTVER_VERSION) {
 		gctl_error(req, "Userland and kernel parts are out of sync.");
 		return;
 	}
 
 	if (strcmp(verb, "create") == 0) {
 		g_mountver_ctl_create(req, mp);
 		return;
 	} else if (strcmp(verb, "destroy") == 0) {
 		g_mountver_ctl_destroy(req, mp);
 		return;
 	}
 
 	gctl_error(req, "Unknown verb.");
 }
 
 static void
 g_mountver_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
     struct g_consumer *cp, struct g_provider *pp)
 {
 	struct g_mountver_softc *sc;
 
 	if (pp != NULL || cp != NULL)
 		return;
 
 	sc = gp->softc;
 	sbuf_printf(sb, "%s<State>%s</State>\n", indent,
 	    sc->sc_orphaned ? "OFFLINE" : "ONLINE");
 	sbuf_printf(sb, "%s<Provider-Name>%s</Provider-Name>\n", indent, sc->sc_provider_name);
 	sbuf_printf(sb, "%s<Disk-Ident>%s</Disk-Ident>\n", indent, sc->sc_ident);
 }
 
 static void
 g_mountver_shutdown_pre_sync(void *arg, int howto)
 {
 	struct g_mountver_softc *sc;
 	struct g_class *mp;
 	struct g_geom *gp, *gp2;
 
 	mp = arg;
 	g_topology_lock();
 	LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) {
 		if (gp->softc == NULL)
 			continue;
 		sc = gp->softc;
 		sc->sc_shutting_down = 1;
 		if (sc->sc_orphaned)
 			g_mountver_destroy(gp, 1);
 	}
 	g_topology_unlock();
 }
 
 static void
 g_mountver_init(struct g_class *mp)
 {
 
 	g_mountver_pre_sync = EVENTHANDLER_REGISTER(shutdown_pre_sync,
 	    g_mountver_shutdown_pre_sync, mp, SHUTDOWN_PRI_FIRST);
 	if (g_mountver_pre_sync == NULL)
 		G_MOUNTVER_DEBUG(0, "Warning! Cannot register shutdown event.");
 }
 
 static void
 g_mountver_fini(struct g_class *mp)
 {
 
 	if (g_mountver_pre_sync != NULL)
 		EVENTHANDLER_DEREGISTER(shutdown_pre_sync, g_mountver_pre_sync);
 }
 
 DECLARE_GEOM_CLASS(g_mountver_class, g_mountver);
 MODULE_VERSION(geom_mountver, 0);
Index: head/sys/geom/mountver/g_mountver.h
===================================================================
--- head/sys/geom/mountver/g_mountver.h	(revision 350693)
+++ head/sys/geom/mountver/g_mountver.h	(revision 350694)
@@ -1,74 +1,59 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2010 Edward Tomasz Napierala <trasz@FreeBSD.org>
  * Copyright (c) 2004-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef	_G_MOUNTVER_H_
 #define	_G_MOUNTVER_H_
 
 #define	G_MOUNTVER_CLASS_NAME	"MOUNTVER"
 #define	G_MOUNTVER_VERSION	4
 #define	G_MOUNTVER_SUFFIX	".mountver"
 
 #ifdef _KERNEL
 
-#define	G_MOUNTVER_DEBUG(lvl, ...)	do {				\
-	if (g_mountver_debug >= (lvl)) {				\
-		printf("GEOM_MOUNTVER");				\
-		if (g_mountver_debug > 0)				\
-			printf("[%u]", lvl);				\
-		printf(": ");						\
-		printf(__VA_ARGS__);					\
-		printf("\n");						\
-	}								\
-} while (0)
-#define	G_MOUNTVER_LOGREQ(bp, ...)	do {				\
-	if (g_mountver_debug >= 2) {					\
-		printf("GEOM_MOUNTVER[2]: ");				\
-		printf(__VA_ARGS__);					\
-		printf(" ");						\
-		g_print_bio(bp);					\
-		printf("\n");						\
-	}								\
-} while (0)
+#define	G_MOUNTVER_DEBUG(lvl, ...) \
+    _GEOM_DEBUG("GEOM_MOUNTVER", g_mountver_debug, (lvl), NULL, __VA_ARGS__)
+#define	G_MOUNTVER_LOGREQ(bp, ...) \
+    _GEOM_DEBUG("GEOM_MOUNTVER", g_mountver_debug, 2, (bp), __VA_ARGS__)
 
 struct g_mountver_softc {
 	TAILQ_HEAD(, bio)		sc_queue;
 	struct mtx			sc_mtx;
 	char				*sc_provider_name;
 	char				sc_ident[DISK_IDENT_SIZE];
 	int				sc_orphaned;
 	int				sc_shutting_down;
 	int				sc_access_r;
 	int				sc_access_w;
 	int				sc_access_e;
 };
 #endif	/* _KERNEL */
 
 #endif	/* _G_MOUNTVER_H_ */
Index: head/sys/geom/nop/g_nop.c
===================================================================
--- head/sys/geom/nop/g_nop.c	(revision 350693)
+++ head/sys/geom/nop/g_nop.c	(revision 350694)
@@ -1,922 +1,923 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2004-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/bio.h>
 #include <sys/sbuf.h>
 #include <sys/sysctl.h>
 #include <sys/malloc.h>
 #include <geom/geom.h>
+#include <geom/geom_dbg.h>
 #include <geom/nop/g_nop.h>
 
 
 SYSCTL_DECL(_kern_geom);
 static SYSCTL_NODE(_kern_geom, OID_AUTO, nop, CTLFLAG_RW, 0, "GEOM_NOP stuff");
 static u_int g_nop_debug = 0;
 SYSCTL_UINT(_kern_geom_nop, OID_AUTO, debug, CTLFLAG_RW, &g_nop_debug, 0,
     "Debug level");
 
 static int g_nop_destroy(struct g_geom *gp, boolean_t force);
 static int g_nop_destroy_geom(struct gctl_req *req, struct g_class *mp,
     struct g_geom *gp);
 static void g_nop_config(struct gctl_req *req, struct g_class *mp,
     const char *verb);
 static g_access_t g_nop_access;
 static g_dumpconf_t g_nop_dumpconf;
 static g_orphan_t g_nop_orphan;
 static g_provgone_t g_nop_providergone;
 static g_resize_t g_nop_resize;
 static g_start_t g_nop_start;
 
 struct g_class g_nop_class = {
 	.name = G_NOP_CLASS_NAME,
 	.version = G_VERSION,
 	.ctlreq = g_nop_config,
 	.destroy_geom = g_nop_destroy_geom,
 	.access = g_nop_access,
 	.dumpconf = g_nop_dumpconf,
 	.orphan = g_nop_orphan,
 	.providergone = g_nop_providergone,
 	.resize = g_nop_resize,
 	.start = g_nop_start,
 };
 
 struct g_nop_delay {
 	struct callout			 dl_cal;
 	struct bio			*dl_bio;
 	TAILQ_ENTRY(g_nop_delay)	 dl_next;
 };
 
 static void
 g_nop_orphan(struct g_consumer *cp)
 {
 
 	g_topology_assert();
 	g_nop_destroy(cp->geom, 1);
 }
 
 static void
 g_nop_resize(struct g_consumer *cp)
 {
 	struct g_nop_softc *sc;
 	struct g_geom *gp;
 	struct g_provider *pp;
 	off_t size;
 
 	g_topology_assert();
 
 	gp = cp->geom;
 	sc = gp->softc;
 
 	if (sc->sc_explicitsize != 0)
 		return;
 	if (cp->provider->mediasize < sc->sc_offset) {
 		g_nop_destroy(gp, 1);
 		return;
 	}
 	size = cp->provider->mediasize - sc->sc_offset;
 	LIST_FOREACH(pp, &gp->provider, provider)
 		g_resize_provider(pp, size);
 }
 
 static int
 g_nop_dumper(void *priv, void *virtual, vm_offset_t physical, off_t offset,
     size_t length)
 {
 
 	return (0);
 }
 
 static void
 g_nop_kerneldump(struct bio *bp, struct g_nop_softc *sc)
 {
 	struct g_kerneldump *gkd;
 	struct g_geom *gp;
 	struct g_provider *pp;
 
 	gkd = (struct g_kerneldump *)bp->bio_data;
 	gp = bp->bio_to->geom;
 	g_trace(G_T_TOPOLOGY, "%s(%s, %jd, %jd)", __func__, gp->name,
 	    (intmax_t)gkd->offset, (intmax_t)gkd->length);
 
 	pp = LIST_FIRST(&gp->provider);
 
 	gkd->di.dumper = g_nop_dumper;
 	gkd->di.priv = sc;
 	gkd->di.blocksize = pp->sectorsize;
 	gkd->di.maxiosize = DFLTPHYS;
 	gkd->di.mediaoffset = sc->sc_offset + gkd->offset;
 	if (gkd->offset > sc->sc_explicitsize) {
 		g_io_deliver(bp, ENODEV);
 		return;
 	}
 	if (gkd->offset + gkd->length > sc->sc_explicitsize)
 		gkd->length = sc->sc_explicitsize - gkd->offset;
 	gkd->di.mediasize = gkd->length;
 	g_io_deliver(bp, 0);
 }
 
 static void
 g_nop_pass(struct bio *cbp, struct g_geom *gp)
 {
 
 	G_NOP_LOGREQ(cbp, "Sending request.");
 	g_io_request(cbp, LIST_FIRST(&gp->consumer));
 }
 
 static void
 g_nop_pass_timeout(void *data)
 {
 	struct g_nop_softc *sc;
 	struct g_geom *gp;
 	struct g_nop_delay *gndelay;
 
 	gndelay = (struct g_nop_delay *)data;
 
 	gp = gndelay->dl_bio->bio_to->geom;
 	sc = gp->softc;
 
 	mtx_lock(&sc->sc_lock);
 	TAILQ_REMOVE(&sc->sc_head_delay, gndelay, dl_next);
 	mtx_unlock(&sc->sc_lock);
 
 	g_nop_pass(gndelay->dl_bio, gp);
 
 	g_free(data);
 }
 
 static void
 g_nop_start(struct bio *bp)
 {
 	struct g_nop_softc *sc;
 	struct g_geom *gp;
 	struct g_provider *pp;
 	struct bio *cbp;
 	u_int failprob, delayprob, delaytime;
 
 	failprob = delayprob = 0;
 
 	gp = bp->bio_to->geom;
 	sc = gp->softc;
 
 	G_NOP_LOGREQ(bp, "Request received.");
 	mtx_lock(&sc->sc_lock);
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 		sc->sc_reads++;
 		sc->sc_readbytes += bp->bio_length;
 		failprob = sc->sc_rfailprob;
 		delayprob = sc->sc_rdelayprob;
 		delaytime = sc->sc_delaymsec;
 		break;
 	case BIO_WRITE:
 		sc->sc_writes++;
 		sc->sc_wrotebytes += bp->bio_length;
 		failprob = sc->sc_wfailprob;
 		delayprob = sc->sc_wdelayprob;
 		delaytime = sc->sc_delaymsec;
 		break;
 	case BIO_DELETE:
 		sc->sc_deletes++;
 		break;
 	case BIO_GETATTR:
 		sc->sc_getattrs++;
 		if (sc->sc_physpath &&
 		    g_handleattr_str(bp, "GEOM::physpath", sc->sc_physpath))
 			;
 		else if (strcmp(bp->bio_attribute, "GEOM::kerneldump") == 0)
 			g_nop_kerneldump(bp, sc);
 		else
 			/*
 			 * Fallthrough to forwarding the GETATTR down to the
 			 * lower level device.
 			 */
 			break;
 		mtx_unlock(&sc->sc_lock);
 		return;
 	case BIO_FLUSH:
 		sc->sc_flushes++;
 		break;
 	case BIO_CMD0:
 		sc->sc_cmd0s++;
 		break;
 	case BIO_CMD1:
 		sc->sc_cmd1s++;
 		break;
 	case BIO_CMD2:
 		sc->sc_cmd2s++;
 		break;
 	}
 	mtx_unlock(&sc->sc_lock);
 	if (failprob > 0) {
 		u_int rval;
 
 		rval = arc4random() % 100;
 		if (rval < failprob) {
 			G_NOP_LOGREQLVL(1, bp, "Returning error=%d.", sc->sc_error);
 			g_io_deliver(bp, sc->sc_error);
 			return;
 		}
 	}
 
 	cbp = g_clone_bio(bp);
 	if (cbp == NULL) {
 		g_io_deliver(bp, ENOMEM);
 		return;
 	}
 	cbp->bio_done = g_std_done;
 	cbp->bio_offset = bp->bio_offset + sc->sc_offset;
 	pp = LIST_FIRST(&gp->provider);
 	KASSERT(pp != NULL, ("NULL pp"));
 	cbp->bio_to = pp;
 
 	if (delayprob > 0) {
 		struct g_nop_delay *gndelay;
 		u_int rval;
 
 		rval = arc4random() % 100;
 		if (rval < delayprob) {
 			gndelay = g_malloc(sizeof(*gndelay), M_NOWAIT | M_ZERO);
 			if (gndelay != NULL) {
 				callout_init(&gndelay->dl_cal, 1);
 
 				gndelay->dl_bio = cbp;
 
 				mtx_lock(&sc->sc_lock);
 				TAILQ_INSERT_TAIL(&sc->sc_head_delay, gndelay,
 				    dl_next);
 				mtx_unlock(&sc->sc_lock);
 
 				callout_reset(&gndelay->dl_cal,
 				    MSEC_2_TICKS(delaytime), g_nop_pass_timeout,
 				    gndelay);
 				return;
 			}
 		}
 	}
 
 	g_nop_pass(cbp, gp);
 }
 
 static int
 g_nop_access(struct g_provider *pp, int dr, int dw, int de)
 {
 	struct g_geom *gp;
 	struct g_consumer *cp;
 	int error;
 
 	gp = pp->geom;
 	cp = LIST_FIRST(&gp->consumer);
 	error = g_access(cp, dr, dw, de);
 
 	return (error);
 }
 
 static int
 g_nop_create(struct gctl_req *req, struct g_class *mp, struct g_provider *pp,
     int ioerror, u_int rfailprob, u_int wfailprob, u_int delaymsec, u_int rdelayprob,
     u_int wdelayprob, off_t offset, off_t size, u_int secsize, off_t stripesize,
     off_t stripeoffset, const char *physpath)
 {
 	struct g_nop_softc *sc;
 	struct g_geom *gp;
 	struct g_provider *newpp;
 	struct g_consumer *cp;
 	char name[64];
 	int error;
 	off_t explicitsize;
 
 	g_topology_assert();
 
 	gp = NULL;
 	newpp = NULL;
 	cp = NULL;
 
 	if ((offset % pp->sectorsize) != 0) {
 		gctl_error(req, "Invalid offset for provider %s.", pp->name);
 		return (EINVAL);
 	}
 	if ((size % pp->sectorsize) != 0) {
 		gctl_error(req, "Invalid size for provider %s.", pp->name);
 		return (EINVAL);
 	}
 	if (offset >= pp->mediasize) {
 		gctl_error(req, "Invalid offset for provider %s.", pp->name);
 		return (EINVAL);
 	}
 	explicitsize = size;
 	if (size == 0)
 		size = pp->mediasize - offset;
 	if (offset + size > pp->mediasize) {
 		gctl_error(req, "Invalid size for provider %s.", pp->name);
 		return (EINVAL);
 	}
 	if (secsize == 0)
 		secsize = pp->sectorsize;
 	else if ((secsize % pp->sectorsize) != 0) {
 		gctl_error(req, "Invalid secsize for provider %s.", pp->name);
 		return (EINVAL);
 	}
 	if (secsize > MAXPHYS) {
 		gctl_error(req, "secsize is too big.");
 		return (EINVAL);
 	}
 	size -= size % secsize;
 	if ((stripesize % pp->sectorsize) != 0) {
 		gctl_error(req, "Invalid stripesize for provider %s.", pp->name);
 		return (EINVAL);
 	}
 	if ((stripeoffset % pp->sectorsize) != 0) {
 		gctl_error(req, "Invalid stripeoffset for provider %s.", pp->name);
 		return (EINVAL);
 	}
 	if (stripesize != 0 && stripeoffset >= stripesize) {
 		gctl_error(req, "stripeoffset is too big.");
 		return (EINVAL);
 	}
 	snprintf(name, sizeof(name), "%s%s", pp->name, G_NOP_SUFFIX);
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		if (strcmp(gp->name, name) == 0) {
 			gctl_error(req, "Provider %s already exists.", name);
 			return (EEXIST);
 		}
 	}
 	gp = g_new_geomf(mp, "%s", name);
 	sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO);
 	sc->sc_offset = offset;
 	sc->sc_explicitsize = explicitsize;
 	sc->sc_stripesize = stripesize;
 	sc->sc_stripeoffset = stripeoffset;
 	if (physpath && strcmp(physpath, G_NOP_PHYSPATH_PASSTHROUGH)) {
 		sc->sc_physpath = strndup(physpath, MAXPATHLEN, M_GEOM);
 	} else
 		sc->sc_physpath = NULL;
 	sc->sc_error = ioerror;
 	sc->sc_rfailprob = rfailprob;
 	sc->sc_wfailprob = wfailprob;
 	sc->sc_delaymsec = delaymsec;
 	sc->sc_rdelayprob = rdelayprob;
 	sc->sc_wdelayprob = wdelayprob;
 	sc->sc_reads = 0;
 	sc->sc_writes = 0;
 	sc->sc_deletes = 0;
 	sc->sc_getattrs = 0;
 	sc->sc_flushes = 0;
 	sc->sc_cmd0s = 0;
 	sc->sc_cmd1s = 0;
 	sc->sc_cmd2s = 0;
 	sc->sc_readbytes = 0;
 	sc->sc_wrotebytes = 0;
 	TAILQ_INIT(&sc->sc_head_delay);
 	mtx_init(&sc->sc_lock, "gnop lock", NULL, MTX_DEF);
 	gp->softc = sc;
 
 	newpp = g_new_providerf(gp, "%s", gp->name);
 	newpp->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE;
 	newpp->mediasize = size;
 	newpp->sectorsize = secsize;
 	newpp->stripesize = stripesize;
 	newpp->stripeoffset = stripeoffset;
 
 	cp = g_new_consumer(gp);
 	cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
 	error = g_attach(cp, pp);
 	if (error != 0) {
 		gctl_error(req, "Cannot attach to provider %s.", pp->name);
 		goto fail;
 	}
 
 	newpp->flags |= pp->flags & G_PF_ACCEPT_UNMAPPED;
 	g_error_provider(newpp, 0);
 	G_NOP_DEBUG(0, "Device %s created.", gp->name);
 	return (0);
 fail:
 	if (cp->provider != NULL)
 		g_detach(cp);
 	g_destroy_consumer(cp);
 	g_destroy_provider(newpp);
 	mtx_destroy(&sc->sc_lock);
 	free(sc->sc_physpath, M_GEOM);
 	g_free(gp->softc);
 	g_destroy_geom(gp);
 	return (error);
 }
 
 static void
 g_nop_providergone(struct g_provider *pp)
 {
 	struct g_geom *gp = pp->geom;
 	struct g_nop_softc *sc = gp->softc;
 
 	KASSERT(TAILQ_EMPTY(&sc->sc_head_delay),
 	    ("delayed request list is not empty"));
 
 	gp->softc = NULL;
 	free(sc->sc_physpath, M_GEOM);
 	mtx_destroy(&sc->sc_lock);
 	g_free(sc);
 }
 
 static int
 g_nop_destroy(struct g_geom *gp, boolean_t force)
 {
 	struct g_nop_softc *sc;
 	struct g_provider *pp;
 
 	g_topology_assert();
 	sc = gp->softc;
 	if (sc == NULL)
 		return (ENXIO);
 	pp = LIST_FIRST(&gp->provider);
 	if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
 		if (force) {
 			G_NOP_DEBUG(0, "Device %s is still open, so it "
 			    "can't be definitely removed.", pp->name);
 		} else {
 			G_NOP_DEBUG(1, "Device %s is still open (r%dw%de%d).",
 			    pp->name, pp->acr, pp->acw, pp->ace);
 			return (EBUSY);
 		}
 	} else {
 		G_NOP_DEBUG(0, "Device %s removed.", gp->name);
 	}
 
 	g_wither_geom(gp, ENXIO);
 
 	return (0);
 }
 
 static int
 g_nop_destroy_geom(struct gctl_req *req, struct g_class *mp, struct g_geom *gp)
 {
 
 	return (g_nop_destroy(gp, 0));
 }
 
 static void
 g_nop_ctl_create(struct gctl_req *req, struct g_class *mp)
 {
 	struct g_provider *pp;
 	intmax_t *error, *rfailprob, *wfailprob, *offset, *secsize, *size,
 	    *stripesize, *stripeoffset, *delaymsec, *rdelayprob, *wdelayprob;
 	const char *name, *physpath;
 	char param[16];
 	int i, *nargs;
 
 	g_topology_assert();
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "No '%s' argument", "nargs");
 		return;
 	}
 	if (*nargs <= 0) {
 		gctl_error(req, "Missing device(s).");
 		return;
 	}
 	error = gctl_get_paraml(req, "error", sizeof(*error));
 	if (error == NULL) {
 		gctl_error(req, "No '%s' argument", "error");
 		return;
 	}
 	rfailprob = gctl_get_paraml(req, "rfailprob", sizeof(*rfailprob));
 	if (rfailprob == NULL) {
 		gctl_error(req, "No '%s' argument", "rfailprob");
 		return;
 	}
 	if (*rfailprob < -1 || *rfailprob > 100) {
 		gctl_error(req, "Invalid '%s' argument", "rfailprob");
 		return;
 	}
 	wfailprob = gctl_get_paraml(req, "wfailprob", sizeof(*wfailprob));
 	if (wfailprob == NULL) {
 		gctl_error(req, "No '%s' argument", "wfailprob");
 		return;
 	}
 	if (*wfailprob < -1 || *wfailprob > 100) {
 		gctl_error(req, "Invalid '%s' argument", "wfailprob");
 		return;
 	}
 	delaymsec = gctl_get_paraml(req, "delaymsec", sizeof(*delaymsec));
 	if (delaymsec == NULL) {
 		gctl_error(req, "No '%s' argument", "delaymsec");
 		return;
 	}
 	if (*delaymsec < 1 && *delaymsec != -1) {
 		gctl_error(req, "Invalid '%s' argument", "delaymsec");
 		return;
 	}
 	rdelayprob = gctl_get_paraml(req, "rdelayprob", sizeof(*rdelayprob));
 	if (rdelayprob == NULL) {
 		gctl_error(req, "No '%s' argument", "rdelayprob");
 		return;
 	}
 	if (*rdelayprob < -1 || *rdelayprob > 100) {
 		gctl_error(req, "Invalid '%s' argument", "rdelayprob");
 		return;
 	}
 	wdelayprob = gctl_get_paraml(req, "wdelayprob", sizeof(*wdelayprob));
 	if (wdelayprob == NULL) {
 		gctl_error(req, "No '%s' argument", "wdelayprob");
 		return;
 	}
 	if (*wdelayprob < -1 || *wdelayprob > 100) {
 		gctl_error(req, "Invalid '%s' argument", "wdelayprob");
 		return;
 	}
 	offset = gctl_get_paraml(req, "offset", sizeof(*offset));
 	if (offset == NULL) {
 		gctl_error(req, "No '%s' argument", "offset");
 		return;
 	}
 	if (*offset < 0) {
 		gctl_error(req, "Invalid '%s' argument", "offset");
 		return;
 	}
 	size = gctl_get_paraml(req, "size", sizeof(*size));
 	if (size == NULL) {
 		gctl_error(req, "No '%s' argument", "size");
 		return;
 	}
 	if (*size < 0) {
 		gctl_error(req, "Invalid '%s' argument", "size");
 		return;
 	}
 	secsize = gctl_get_paraml(req, "secsize", sizeof(*secsize));
 	if (secsize == NULL) {
 		gctl_error(req, "No '%s' argument", "secsize");
 		return;
 	}
 	if (*secsize < 0) {
 		gctl_error(req, "Invalid '%s' argument", "secsize");
 		return;
 	}
 	stripesize = gctl_get_paraml(req, "stripesize", sizeof(*stripesize));
 	if (stripesize == NULL) {
 		gctl_error(req, "No '%s' argument", "stripesize");
 		return;
 	}
 	if (*stripesize < 0) {
 		gctl_error(req, "Invalid '%s' argument", "stripesize");
 		return;
 	}
 	stripeoffset = gctl_get_paraml(req, "stripeoffset", sizeof(*stripeoffset));
 	if (stripeoffset == NULL) {
 		gctl_error(req, "No '%s' argument", "stripeoffset");
 		return;
 	}
 	if (*stripeoffset < 0) {
 		gctl_error(req, "Invalid '%s' argument", "stripeoffset");
 		return;
 	}
 	physpath = gctl_get_asciiparam(req, "physpath");
 
 	for (i = 0; i < *nargs; i++) {
 		snprintf(param, sizeof(param), "arg%d", i);
 		name = gctl_get_asciiparam(req, param);
 		if (name == NULL) {
 			gctl_error(req, "No 'arg%d' argument", i);
 			return;
 		}
 		if (strncmp(name, "/dev/", strlen("/dev/")) == 0)
 			name += strlen("/dev/");
 		pp = g_provider_by_name(name);
 		if (pp == NULL) {
 			G_NOP_DEBUG(1, "Provider %s is invalid.", name);
 			gctl_error(req, "Provider %s is invalid.", name);
 			return;
 		}
 		if (g_nop_create(req, mp, pp,
 		    *error == -1 ? EIO : (int)*error,
 		    *rfailprob == -1 ? 0 : (u_int)*rfailprob,
 		    *wfailprob == -1 ? 0 : (u_int)*wfailprob,
 		    *delaymsec == -1 ? 1 : (u_int)*delaymsec,
 		    *rdelayprob == -1 ? 0 : (u_int)*rdelayprob,
 		    *wdelayprob == -1 ? 0 : (u_int)*wdelayprob,
 		    (off_t)*offset, (off_t)*size, (u_int)*secsize,
 		    (off_t)*stripesize, (off_t)*stripeoffset,
 		    physpath) != 0) {
 			return;
 		}
 	}
 }
 
 static void
 g_nop_ctl_configure(struct gctl_req *req, struct g_class *mp)
 {
 	struct g_nop_softc *sc;
 	struct g_provider *pp;
 	intmax_t *delaymsec, *error, *rdelayprob, *rfailprob, *wdelayprob, *wfailprob;
 	const char *name;
 	char param[16];
 	int i, *nargs;
 
 	g_topology_assert();
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "No '%s' argument", "nargs");
 		return;
 	}
 	if (*nargs <= 0) {
 		gctl_error(req, "Missing device(s).");
 		return;
 	}
 	error = gctl_get_paraml(req, "error", sizeof(*error));
 	if (error == NULL) {
 		gctl_error(req, "No '%s' argument", "error");
 		return;
 	}
 	rfailprob = gctl_get_paraml(req, "rfailprob", sizeof(*rfailprob));
 	if (rfailprob == NULL) {
 		gctl_error(req, "No '%s' argument", "rfailprob");
 		return;
 	}
 	if (*rfailprob < -1 || *rfailprob > 100) {
 		gctl_error(req, "Invalid '%s' argument", "rfailprob");
 		return;
 	}
 	wfailprob = gctl_get_paraml(req, "wfailprob", sizeof(*wfailprob));
 	if (wfailprob == NULL) {
 		gctl_error(req, "No '%s' argument", "wfailprob");
 		return;
 	}
 	if (*wfailprob < -1 || *wfailprob > 100) {
 		gctl_error(req, "Invalid '%s' argument", "wfailprob");
 		return;
 	}
 
 	delaymsec = gctl_get_paraml(req, "delaymsec", sizeof(*delaymsec));
 	if (delaymsec == NULL) {
 		gctl_error(req, "No '%s' argument", "delaymsec");
 		return;
 	}
 	if (*delaymsec < 1 && *delaymsec != -1) {
 		gctl_error(req, "Invalid '%s' argument", "delaymsec");
 		return;
 	}
 	rdelayprob = gctl_get_paraml(req, "rdelayprob", sizeof(*rdelayprob));
 	if (rdelayprob == NULL) {
 		gctl_error(req, "No '%s' argument", "rdelayprob");
 		return;
 	}
 	if (*rdelayprob < -1 || *rdelayprob > 100) {
 		gctl_error(req, "Invalid '%s' argument", "rdelayprob");
 		return;
 	}
 	wdelayprob = gctl_get_paraml(req, "wdelayprob", sizeof(*wdelayprob));
 	if (wdelayprob == NULL) {
 		gctl_error(req, "No '%s' argument", "wdelayprob");
 		return;
 	}
 	if (*wdelayprob < -1 || *wdelayprob > 100) {
 		gctl_error(req, "Invalid '%s' argument", "wdelayprob");
 		return;
 	}
 
 	for (i = 0; i < *nargs; i++) {
 		snprintf(param, sizeof(param), "arg%d", i);
 		name = gctl_get_asciiparam(req, param);
 		if (name == NULL) {
 			gctl_error(req, "No 'arg%d' argument", i);
 			return;
 		}
 		if (strncmp(name, "/dev/", strlen("/dev/")) == 0)
 			name += strlen("/dev/");
 		pp = g_provider_by_name(name);
 		if (pp == NULL || pp->geom->class != mp) {
 			G_NOP_DEBUG(1, "Provider %s is invalid.", name);
 			gctl_error(req, "Provider %s is invalid.", name);
 			return;
 		}
 		sc = pp->geom->softc;
 		if (*error != -1)
 			sc->sc_error = (int)*error;
 		if (*rfailprob != -1)
 			sc->sc_rfailprob = (u_int)*rfailprob;
 		if (*wfailprob != -1)
 			sc->sc_wfailprob = (u_int)*wfailprob;
 		if (*rdelayprob != -1)
 			sc->sc_rdelayprob = (u_int)*rdelayprob;
 		if (*wdelayprob != -1)
 			sc->sc_wdelayprob = (u_int)*wdelayprob;
 		if (*delaymsec != -1)
 			sc->sc_delaymsec = (u_int)*delaymsec;
 	}
 }
 
 static struct g_geom *
 g_nop_find_geom(struct g_class *mp, const char *name)
 {
 	struct g_geom *gp;
 
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		if (strcmp(gp->name, name) == 0)
 			return (gp);
 	}
 	return (NULL);
 }
 
 static void
 g_nop_ctl_destroy(struct gctl_req *req, struct g_class *mp)
 {
 	int *nargs, *force, error, i;
 	struct g_geom *gp;
 	const char *name;
 	char param[16];
 
 	g_topology_assert();
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "No '%s' argument", "nargs");
 		return;
 	}
 	if (*nargs <= 0) {
 		gctl_error(req, "Missing device(s).");
 		return;
 	}
 	force = gctl_get_paraml(req, "force", sizeof(*force));
 	if (force == NULL) {
 		gctl_error(req, "No 'force' argument");
 		return;
 	}
 
 	for (i = 0; i < *nargs; i++) {
 		snprintf(param, sizeof(param), "arg%d", i);
 		name = gctl_get_asciiparam(req, param);
 		if (name == NULL) {
 			gctl_error(req, "No 'arg%d' argument", i);
 			return;
 		}
 		if (strncmp(name, "/dev/", strlen("/dev/")) == 0)
 			name += strlen("/dev/");
 		gp = g_nop_find_geom(mp, name);
 		if (gp == NULL) {
 			G_NOP_DEBUG(1, "Device %s is invalid.", name);
 			gctl_error(req, "Device %s is invalid.", name);
 			return;
 		}
 		error = g_nop_destroy(gp, *force);
 		if (error != 0) {
 			gctl_error(req, "Cannot destroy device %s (error=%d).",
 			    gp->name, error);
 			return;
 		}
 	}
 }
 
 static void
 g_nop_ctl_reset(struct gctl_req *req, struct g_class *mp)
 {
 	struct g_nop_softc *sc;
 	struct g_provider *pp;
 	const char *name;
 	char param[16];
 	int i, *nargs;
 
 	g_topology_assert();
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "No '%s' argument", "nargs");
 		return;
 	}
 	if (*nargs <= 0) {
 		gctl_error(req, "Missing device(s).");
 		return;
 	}
 
 	for (i = 0; i < *nargs; i++) {
 		snprintf(param, sizeof(param), "arg%d", i);
 		name = gctl_get_asciiparam(req, param);
 		if (name == NULL) {
 			gctl_error(req, "No 'arg%d' argument", i);
 			return;
 		}
 		if (strncmp(name, "/dev/", strlen("/dev/")) == 0)
 			name += strlen("/dev/");
 		pp = g_provider_by_name(name);
 		if (pp == NULL || pp->geom->class != mp) {
 			G_NOP_DEBUG(1, "Provider %s is invalid.", name);
 			gctl_error(req, "Provider %s is invalid.", name);
 			return;
 		}
 		sc = pp->geom->softc;
 		sc->sc_reads = 0;
 		sc->sc_writes = 0;
 		sc->sc_deletes = 0;
 		sc->sc_getattrs = 0;
 		sc->sc_flushes = 0;
 		sc->sc_cmd0s = 0;
 		sc->sc_cmd1s = 0;
 		sc->sc_cmd2s = 0;
 		sc->sc_readbytes = 0;
 		sc->sc_wrotebytes = 0;
 	}
 }
 
 static void
 g_nop_config(struct gctl_req *req, struct g_class *mp, const char *verb)
 {
 	uint32_t *version;
 
 	g_topology_assert();
 
 	version = gctl_get_paraml(req, "version", sizeof(*version));
 	if (version == NULL) {
 		gctl_error(req, "No '%s' argument.", "version");
 		return;
 	}
 	if (*version != G_NOP_VERSION) {
 		gctl_error(req, "Userland and kernel parts are out of sync.");
 		return;
 	}
 
 	if (strcmp(verb, "create") == 0) {
 		g_nop_ctl_create(req, mp);
 		return;
 	} else if (strcmp(verb, "configure") == 0) {
 		g_nop_ctl_configure(req, mp);
 		return;
 	} else if (strcmp(verb, "destroy") == 0) {
 		g_nop_ctl_destroy(req, mp);
 		return;
 	} else if (strcmp(verb, "reset") == 0) {
 		g_nop_ctl_reset(req, mp);
 		return;
 	}
 
 	gctl_error(req, "Unknown verb.");
 }
 
 static void
 g_nop_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
     struct g_consumer *cp, struct g_provider *pp)
 {
 	struct g_nop_softc *sc;
 
 	if (pp != NULL || cp != NULL)
 		return;
 	sc = gp->softc;
 	sbuf_printf(sb, "%s<Offset>%jd</Offset>\n", indent,
 	    (intmax_t)sc->sc_offset);
 	sbuf_printf(sb, "%s<ReadFailProb>%u</ReadFailProb>\n", indent,
 	    sc->sc_rfailprob);
 	sbuf_printf(sb, "%s<WriteFailProb>%u</WriteFailProb>\n", indent,
 	    sc->sc_wfailprob);
 	sbuf_printf(sb, "%s<ReadDelayedProb>%u</ReadDelayedProb>\n", indent,
 	    sc->sc_rdelayprob);
 	sbuf_printf(sb, "%s<WriteDelayedProb>%u</WriteDelayedProb>\n", indent,
 	    sc->sc_wdelayprob);
 	sbuf_printf(sb, "%s<Delay>%d</Delay>\n", indent, sc->sc_delaymsec);
 	sbuf_printf(sb, "%s<Error>%d</Error>\n", indent, sc->sc_error);
 	sbuf_printf(sb, "%s<Reads>%ju</Reads>\n", indent, sc->sc_reads);
 	sbuf_printf(sb, "%s<Writes>%ju</Writes>\n", indent, sc->sc_writes);
 	sbuf_printf(sb, "%s<Deletes>%ju</Deletes>\n", indent, sc->sc_deletes);
 	sbuf_printf(sb, "%s<Getattrs>%ju</Getattrs>\n", indent, sc->sc_getattrs);
 	sbuf_printf(sb, "%s<Flushes>%ju</Flushes>\n", indent, sc->sc_flushes);
 	sbuf_printf(sb, "%s<Cmd0s>%ju</Cmd0s>\n", indent, sc->sc_cmd0s);
 	sbuf_printf(sb, "%s<Cmd1s>%ju</Cmd1s>\n", indent, sc->sc_cmd1s);
 	sbuf_printf(sb, "%s<Cmd2s>%ju</Cmd2s>\n", indent, sc->sc_cmd2s);
 	sbuf_printf(sb, "%s<ReadBytes>%ju</ReadBytes>\n", indent,
 	    sc->sc_readbytes);
 	sbuf_printf(sb, "%s<WroteBytes>%ju</WroteBytes>\n", indent,
 	    sc->sc_wrotebytes);
 }
 
 DECLARE_GEOM_CLASS(g_nop_class, g_nop);
 MODULE_VERSION(geom_nop, 0);
Index: head/sys/geom/nop/g_nop.h
===================================================================
--- head/sys/geom/nop/g_nop.h	(revision 350693)
+++ head/sys/geom/nop/g_nop.h	(revision 350694)
@@ -1,96 +1,81 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2004-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef	_G_NOP_H_
 #define	_G_NOP_H_
 
 #define	G_NOP_CLASS_NAME	"NOP"
 #define	G_NOP_VERSION		4
 #define	G_NOP_SUFFIX		".nop"
 /*
  * Special flag to instruct gnop to passthrough the underlying provider's
  * physical path
  */
 #define G_NOP_PHYSPATH_PASSTHROUGH "\255"
 
 #ifdef _KERNEL
-#define	G_NOP_DEBUG(lvl, ...)	do {					\
-	if (g_nop_debug >= (lvl)) {					\
-		printf("GEOM_NOP");					\
-		if (g_nop_debug > 0)					\
-			printf("[%u]", lvl);				\
-		printf(": ");						\
-		printf(__VA_ARGS__);					\
-		printf("\n");						\
-	}								\
-} while (0)
+#define	G_NOP_DEBUG(lvl, ...) \
+    _GEOM_DEBUG("GEOM_NOP", g_nop_debug, (lvl), NULL, __VA_ARGS__)
+#define G_NOP_LOGREQLVL(lvl, bp, ...) \
+    _GEOM_DEBUG("GEOM_NOP", g_nop_debug, (lvl), (bp), __VA_ARGS__)
 #define	G_NOP_LOGREQ(bp, ...)	G_NOP_LOGREQLVL(2, bp, __VA_ARGS__)
-#define G_NOP_LOGREQLVL(lvl, bp, ...) do {				\
-	if (g_nop_debug >= (lvl)) {					\
-		printf("GEOM_NOP[%d]: ", (lvl));			\
-		printf(__VA_ARGS__);					\
-		printf(" ");						\
-		g_print_bio(bp);					\
-		printf("\n");						\
-	}								\
-} while (0)
 
 struct g_nop_delay;
 
 TAILQ_HEAD(g_nop_delay_head, g_nop_delay);
 
 struct g_nop_softc {
 	int			 sc_error;
 	off_t			 sc_offset;
 	off_t			 sc_explicitsize;
 	off_t			 sc_stripesize;
 	off_t			 sc_stripeoffset;
 	u_int			 sc_rfailprob;
 	u_int			 sc_wfailprob;
 	u_int			 sc_delaymsec;
 	u_int			 sc_rdelayprob;
 	u_int			 sc_wdelayprob;
 	uintmax_t		 sc_reads;
 	uintmax_t		 sc_writes;
 	uintmax_t		 sc_deletes;
 	uintmax_t		 sc_getattrs;
 	uintmax_t		 sc_flushes;
 	uintmax_t		 sc_cmd0s;
 	uintmax_t		 sc_cmd1s;
 	uintmax_t		 sc_cmd2s;
 	uintmax_t		 sc_readbytes;
 	uintmax_t		 sc_wrotebytes;
 	char			*sc_physpath;
 	struct mtx		 sc_lock;
 	struct g_nop_delay_head	 sc_head_delay;
 };
 #endif	/* _KERNEL */
 
 #endif	/* _G_NOP_H_ */
Index: head/sys/geom/raid/g_raid.c
===================================================================
--- head/sys/geom/raid/g_raid.c	(revision 350693)
+++ head/sys/geom/raid/g_raid.c	(revision 350694)
@@ -1,2571 +1,2572 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2010 Alexander Motin <mav@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/bio.h>
 #include <sys/sbuf.h>
 #include <sys/sysctl.h>
 #include <sys/malloc.h>
 #include <sys/eventhandler.h>
 #include <vm/uma.h>
 #include <geom/geom.h>
+#include <geom/geom_dbg.h>
 #include <sys/proc.h>
 #include <sys/kthread.h>
 #include <sys/sched.h>
 #include <geom/raid/g_raid.h>
 #include "g_raid_md_if.h"
 #include "g_raid_tr_if.h"
 
 static MALLOC_DEFINE(M_RAID, "raid_data", "GEOM_RAID Data");
 
 SYSCTL_DECL(_kern_geom);
 SYSCTL_NODE(_kern_geom, OID_AUTO, raid, CTLFLAG_RW, 0, "GEOM_RAID stuff");
 int g_raid_enable = 1;
 SYSCTL_INT(_kern_geom_raid, OID_AUTO, enable, CTLFLAG_RWTUN,
     &g_raid_enable, 0, "Enable on-disk metadata taste");
 u_int g_raid_aggressive_spare = 0;
 SYSCTL_UINT(_kern_geom_raid, OID_AUTO, aggressive_spare, CTLFLAG_RWTUN,
     &g_raid_aggressive_spare, 0, "Use disks without metadata as spare");
 u_int g_raid_debug = 0;
 SYSCTL_UINT(_kern_geom_raid, OID_AUTO, debug, CTLFLAG_RWTUN, &g_raid_debug, 0,
     "Debug level");
 int g_raid_read_err_thresh = 10;
 SYSCTL_UINT(_kern_geom_raid, OID_AUTO, read_err_thresh, CTLFLAG_RWTUN,
     &g_raid_read_err_thresh, 0,
     "Number of read errors equated to disk failure");
 u_int g_raid_start_timeout = 30;
 SYSCTL_UINT(_kern_geom_raid, OID_AUTO, start_timeout, CTLFLAG_RWTUN,
     &g_raid_start_timeout, 0,
     "Time to wait for all array components");
 static u_int g_raid_clean_time = 5;
 SYSCTL_UINT(_kern_geom_raid, OID_AUTO, clean_time, CTLFLAG_RWTUN,
     &g_raid_clean_time, 0, "Mark volume as clean when idling");
 static u_int g_raid_disconnect_on_failure = 1;
 SYSCTL_UINT(_kern_geom_raid, OID_AUTO, disconnect_on_failure, CTLFLAG_RWTUN,
     &g_raid_disconnect_on_failure, 0, "Disconnect component on I/O failure.");
 static u_int g_raid_name_format = 0;
 SYSCTL_UINT(_kern_geom_raid, OID_AUTO, name_format, CTLFLAG_RWTUN,
     &g_raid_name_format, 0, "Providers name format.");
 static u_int g_raid_idle_threshold = 1000000;
 SYSCTL_UINT(_kern_geom_raid, OID_AUTO, idle_threshold, CTLFLAG_RWTUN,
     &g_raid_idle_threshold, 1000000,
     "Time in microseconds to consider a volume idle.");
 
 #define	MSLEEP(rv, ident, mtx, priority, wmesg, timeout)	do {	\
 	G_RAID_DEBUG(4, "%s: Sleeping %p.", __func__, (ident));		\
 	rv = msleep((ident), (mtx), (priority), (wmesg), (timeout));	\
 	G_RAID_DEBUG(4, "%s: Woken up %p.", __func__, (ident));		\
 } while (0)
 
 LIST_HEAD(, g_raid_md_class) g_raid_md_classes =
     LIST_HEAD_INITIALIZER(g_raid_md_classes);
 
 LIST_HEAD(, g_raid_tr_class) g_raid_tr_classes =
     LIST_HEAD_INITIALIZER(g_raid_tr_classes);
 
 LIST_HEAD(, g_raid_volume) g_raid_volumes =
     LIST_HEAD_INITIALIZER(g_raid_volumes);
 
 static eventhandler_tag g_raid_post_sync = NULL;
 static int g_raid_started = 0;
 static int g_raid_shutdown = 0;
 
 static int g_raid_destroy_geom(struct gctl_req *req, struct g_class *mp,
     struct g_geom *gp);
 static g_taste_t g_raid_taste;
 static void g_raid_init(struct g_class *mp);
 static void g_raid_fini(struct g_class *mp);
 
 struct g_class g_raid_class = {
 	.name = G_RAID_CLASS_NAME,
 	.version = G_VERSION,
 	.ctlreq = g_raid_ctl,
 	.taste = g_raid_taste,
 	.destroy_geom = g_raid_destroy_geom,
 	.init = g_raid_init,
 	.fini = g_raid_fini
 };
 
 static void g_raid_destroy_provider(struct g_raid_volume *vol);
 static int g_raid_update_disk(struct g_raid_disk *disk, u_int event);
 static int g_raid_update_subdisk(struct g_raid_subdisk *subdisk, u_int event);
 static int g_raid_update_volume(struct g_raid_volume *vol, u_int event);
 static int g_raid_update_node(struct g_raid_softc *sc, u_int event);
 static void g_raid_dumpconf(struct sbuf *sb, const char *indent,
     struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp);
 static void g_raid_start(struct bio *bp);
 static void g_raid_start_request(struct bio *bp);
 static void g_raid_disk_done(struct bio *bp);
 static void g_raid_poll(struct g_raid_softc *sc);
 
 static const char *
 g_raid_node_event2str(int event)
 {
 
 	switch (event) {
 	case G_RAID_NODE_E_WAKE:
 		return ("WAKE");
 	case G_RAID_NODE_E_START:
 		return ("START");
 	default:
 		return ("INVALID");
 	}
 }
 
 const char *
 g_raid_disk_state2str(int state)
 {
 
 	switch (state) {
 	case G_RAID_DISK_S_NONE:
 		return ("NONE");
 	case G_RAID_DISK_S_OFFLINE:
 		return ("OFFLINE");
 	case G_RAID_DISK_S_DISABLED:
 		return ("DISABLED");
 	case G_RAID_DISK_S_FAILED:
 		return ("FAILED");
 	case G_RAID_DISK_S_STALE_FAILED:
 		return ("STALE_FAILED");
 	case G_RAID_DISK_S_SPARE:
 		return ("SPARE");
 	case G_RAID_DISK_S_STALE:
 		return ("STALE");
 	case G_RAID_DISK_S_ACTIVE:
 		return ("ACTIVE");
 	default:
 		return ("INVALID");
 	}
 }
 
 static const char *
 g_raid_disk_event2str(int event)
 {
 
 	switch (event) {
 	case G_RAID_DISK_E_DISCONNECTED:
 		return ("DISCONNECTED");
 	default:
 		return ("INVALID");
 	}
 }
 
 const char *
 g_raid_subdisk_state2str(int state)
 {
 
 	switch (state) {
 	case G_RAID_SUBDISK_S_NONE:
 		return ("NONE");
 	case G_RAID_SUBDISK_S_FAILED:
 		return ("FAILED");
 	case G_RAID_SUBDISK_S_NEW:
 		return ("NEW");
 	case G_RAID_SUBDISK_S_REBUILD:
 		return ("REBUILD");
 	case G_RAID_SUBDISK_S_UNINITIALIZED:
 		return ("UNINITIALIZED");
 	case G_RAID_SUBDISK_S_STALE:
 		return ("STALE");
 	case G_RAID_SUBDISK_S_RESYNC:
 		return ("RESYNC");
 	case G_RAID_SUBDISK_S_ACTIVE:
 		return ("ACTIVE");
 	default:
 		return ("INVALID");
 	}
 }
 
 static const char *
 g_raid_subdisk_event2str(int event)
 {
 
 	switch (event) {
 	case G_RAID_SUBDISK_E_NEW:
 		return ("NEW");
 	case G_RAID_SUBDISK_E_FAILED:
 		return ("FAILED");
 	case G_RAID_SUBDISK_E_DISCONNECTED:
 		return ("DISCONNECTED");
 	default:
 		return ("INVALID");
 	}
 }
 
 const char *
 g_raid_volume_state2str(int state)
 {
 
 	switch (state) {
 	case G_RAID_VOLUME_S_STARTING:
 		return ("STARTING");
 	case G_RAID_VOLUME_S_BROKEN:
 		return ("BROKEN");
 	case G_RAID_VOLUME_S_DEGRADED:
 		return ("DEGRADED");
 	case G_RAID_VOLUME_S_SUBOPTIMAL:
 		return ("SUBOPTIMAL");
 	case G_RAID_VOLUME_S_OPTIMAL:
 		return ("OPTIMAL");
 	case G_RAID_VOLUME_S_UNSUPPORTED:
 		return ("UNSUPPORTED");
 	case G_RAID_VOLUME_S_STOPPED:
 		return ("STOPPED");
 	default:
 		return ("INVALID");
 	}
 }
 
 static const char *
 g_raid_volume_event2str(int event)
 {
 
 	switch (event) {
 	case G_RAID_VOLUME_E_UP:
 		return ("UP");
 	case G_RAID_VOLUME_E_DOWN:
 		return ("DOWN");
 	case G_RAID_VOLUME_E_START:
 		return ("START");
 	case G_RAID_VOLUME_E_STARTMD:
 		return ("STARTMD");
 	default:
 		return ("INVALID");
 	}
 }
 
 const char *
 g_raid_volume_level2str(int level, int qual)
 {
 
 	switch (level) {
 	case G_RAID_VOLUME_RL_RAID0:
 		return ("RAID0");
 	case G_RAID_VOLUME_RL_RAID1:
 		return ("RAID1");
 	case G_RAID_VOLUME_RL_RAID3:
 		if (qual == G_RAID_VOLUME_RLQ_R3P0)
 			return ("RAID3-P0");
 		if (qual == G_RAID_VOLUME_RLQ_R3PN)
 			return ("RAID3-PN");
 		return ("RAID3");
 	case G_RAID_VOLUME_RL_RAID4:
 		if (qual == G_RAID_VOLUME_RLQ_R4P0)
 			return ("RAID4-P0");
 		if (qual == G_RAID_VOLUME_RLQ_R4PN)
 			return ("RAID4-PN");
 		return ("RAID4");
 	case G_RAID_VOLUME_RL_RAID5:
 		if (qual == G_RAID_VOLUME_RLQ_R5RA)
 			return ("RAID5-RA");
 		if (qual == G_RAID_VOLUME_RLQ_R5RS)
 			return ("RAID5-RS");
 		if (qual == G_RAID_VOLUME_RLQ_R5LA)
 			return ("RAID5-LA");
 		if (qual == G_RAID_VOLUME_RLQ_R5LS)
 			return ("RAID5-LS");
 		return ("RAID5");
 	case G_RAID_VOLUME_RL_RAID6:
 		if (qual == G_RAID_VOLUME_RLQ_R6RA)
 			return ("RAID6-RA");
 		if (qual == G_RAID_VOLUME_RLQ_R6RS)
 			return ("RAID6-RS");
 		if (qual == G_RAID_VOLUME_RLQ_R6LA)
 			return ("RAID6-LA");
 		if (qual == G_RAID_VOLUME_RLQ_R6LS)
 			return ("RAID6-LS");
 		return ("RAID6");
 	case G_RAID_VOLUME_RL_RAIDMDF:
 		if (qual == G_RAID_VOLUME_RLQ_RMDFRA)
 			return ("RAIDMDF-RA");
 		if (qual == G_RAID_VOLUME_RLQ_RMDFRS)
 			return ("RAIDMDF-RS");
 		if (qual == G_RAID_VOLUME_RLQ_RMDFLA)
 			return ("RAIDMDF-LA");
 		if (qual == G_RAID_VOLUME_RLQ_RMDFLS)
 			return ("RAIDMDF-LS");
 		return ("RAIDMDF");
 	case G_RAID_VOLUME_RL_RAID1E:
 		if (qual == G_RAID_VOLUME_RLQ_R1EA)
 			return ("RAID1E-A");
 		if (qual == G_RAID_VOLUME_RLQ_R1EO)
 			return ("RAID1E-O");
 		return ("RAID1E");
 	case G_RAID_VOLUME_RL_SINGLE:
 		return ("SINGLE");
 	case G_RAID_VOLUME_RL_CONCAT:
 		return ("CONCAT");
 	case G_RAID_VOLUME_RL_RAID5E:
 		if (qual == G_RAID_VOLUME_RLQ_R5ERA)
 			return ("RAID5E-RA");
 		if (qual == G_RAID_VOLUME_RLQ_R5ERS)
 			return ("RAID5E-RS");
 		if (qual == G_RAID_VOLUME_RLQ_R5ELA)
 			return ("RAID5E-LA");
 		if (qual == G_RAID_VOLUME_RLQ_R5ELS)
 			return ("RAID5E-LS");
 		return ("RAID5E");
 	case G_RAID_VOLUME_RL_RAID5EE:
 		if (qual == G_RAID_VOLUME_RLQ_R5EERA)
 			return ("RAID5EE-RA");
 		if (qual == G_RAID_VOLUME_RLQ_R5EERS)
 			return ("RAID5EE-RS");
 		if (qual == G_RAID_VOLUME_RLQ_R5EELA)
 			return ("RAID5EE-LA");
 		if (qual == G_RAID_VOLUME_RLQ_R5EELS)
 			return ("RAID5EE-LS");
 		return ("RAID5EE");
 	case G_RAID_VOLUME_RL_RAID5R:
 		if (qual == G_RAID_VOLUME_RLQ_R5RRA)
 			return ("RAID5R-RA");
 		if (qual == G_RAID_VOLUME_RLQ_R5RRS)
 			return ("RAID5R-RS");
 		if (qual == G_RAID_VOLUME_RLQ_R5RLA)
 			return ("RAID5R-LA");
 		if (qual == G_RAID_VOLUME_RLQ_R5RLS)
 			return ("RAID5R-LS");
 		return ("RAID5E");
 	default:
 		return ("UNKNOWN");
 	}
 }
 
 int
 g_raid_volume_str2level(const char *str, int *level, int *qual)
 {
 
 	*level = G_RAID_VOLUME_RL_UNKNOWN;
 	*qual = G_RAID_VOLUME_RLQ_NONE;
 	if (strcasecmp(str, "RAID0") == 0)
 		*level = G_RAID_VOLUME_RL_RAID0;
 	else if (strcasecmp(str, "RAID1") == 0)
 		*level = G_RAID_VOLUME_RL_RAID1;
 	else if (strcasecmp(str, "RAID3-P0") == 0) {
 		*level = G_RAID_VOLUME_RL_RAID3;
 		*qual = G_RAID_VOLUME_RLQ_R3P0;
 	} else if (strcasecmp(str, "RAID3-PN") == 0 ||
 		   strcasecmp(str, "RAID3") == 0) {
 		*level = G_RAID_VOLUME_RL_RAID3;
 		*qual = G_RAID_VOLUME_RLQ_R3PN;
 	} else if (strcasecmp(str, "RAID4-P0") == 0) {
 		*level = G_RAID_VOLUME_RL_RAID4;
 		*qual = G_RAID_VOLUME_RLQ_R4P0;
 	} else if (strcasecmp(str, "RAID4-PN") == 0 ||
 		   strcasecmp(str, "RAID4") == 0) {
 		*level = G_RAID_VOLUME_RL_RAID4;
 		*qual = G_RAID_VOLUME_RLQ_R4PN;
 	} else if (strcasecmp(str, "RAID5-RA") == 0) {
 		*level = G_RAID_VOLUME_RL_RAID5;
 		*qual = G_RAID_VOLUME_RLQ_R5RA;
 	} else if (strcasecmp(str, "RAID5-RS") == 0) {
 		*level = G_RAID_VOLUME_RL_RAID5;
 		*qual = G_RAID_VOLUME_RLQ_R5RS;
 	} else if (strcasecmp(str, "RAID5") == 0 ||
 		   strcasecmp(str, "RAID5-LA") == 0) {
 		*level = G_RAID_VOLUME_RL_RAID5;
 		*qual = G_RAID_VOLUME_RLQ_R5LA;
 	} else if (strcasecmp(str, "RAID5-LS") == 0) {
 		*level = G_RAID_VOLUME_RL_RAID5;
 		*qual = G_RAID_VOLUME_RLQ_R5LS;
 	} else if (strcasecmp(str, "RAID6-RA") == 0) {
 		*level = G_RAID_VOLUME_RL_RAID6;
 		*qual = G_RAID_VOLUME_RLQ_R6RA;
 	} else if (strcasecmp(str, "RAID6-RS") == 0) {
 		*level = G_RAID_VOLUME_RL_RAID6;
 		*qual = G_RAID_VOLUME_RLQ_R6RS;
 	} else if (strcasecmp(str, "RAID6") == 0 ||
 		   strcasecmp(str, "RAID6-LA") == 0) {
 		*level = G_RAID_VOLUME_RL_RAID6;
 		*qual = G_RAID_VOLUME_RLQ_R6LA;
 	} else if (strcasecmp(str, "RAID6-LS") == 0) {
 		*level = G_RAID_VOLUME_RL_RAID6;
 		*qual = G_RAID_VOLUME_RLQ_R6LS;
 	} else if (strcasecmp(str, "RAIDMDF-RA") == 0) {
 		*level = G_RAID_VOLUME_RL_RAIDMDF;
 		*qual = G_RAID_VOLUME_RLQ_RMDFRA;
 	} else if (strcasecmp(str, "RAIDMDF-RS") == 0) {
 		*level = G_RAID_VOLUME_RL_RAIDMDF;
 		*qual = G_RAID_VOLUME_RLQ_RMDFRS;
 	} else if (strcasecmp(str, "RAIDMDF") == 0 ||
 		   strcasecmp(str, "RAIDMDF-LA") == 0) {
 		*level = G_RAID_VOLUME_RL_RAIDMDF;
 		*qual = G_RAID_VOLUME_RLQ_RMDFLA;
 	} else if (strcasecmp(str, "RAIDMDF-LS") == 0) {
 		*level = G_RAID_VOLUME_RL_RAIDMDF;
 		*qual = G_RAID_VOLUME_RLQ_RMDFLS;
 	} else if (strcasecmp(str, "RAID10") == 0 ||
 		   strcasecmp(str, "RAID1E") == 0 ||
 		   strcasecmp(str, "RAID1E-A") == 0) {
 		*level = G_RAID_VOLUME_RL_RAID1E;
 		*qual = G_RAID_VOLUME_RLQ_R1EA;
 	} else if (strcasecmp(str, "RAID1E-O") == 0) {
 		*level = G_RAID_VOLUME_RL_RAID1E;
 		*qual = G_RAID_VOLUME_RLQ_R1EO;
 	} else if (strcasecmp(str, "SINGLE") == 0)
 		*level = G_RAID_VOLUME_RL_SINGLE;
 	else if (strcasecmp(str, "CONCAT") == 0)
 		*level = G_RAID_VOLUME_RL_CONCAT;
 	else if (strcasecmp(str, "RAID5E-RA") == 0) {
 		*level = G_RAID_VOLUME_RL_RAID5E;
 		*qual = G_RAID_VOLUME_RLQ_R5ERA;
 	} else if (strcasecmp(str, "RAID5E-RS") == 0) {
 		*level = G_RAID_VOLUME_RL_RAID5E;
 		*qual = G_RAID_VOLUME_RLQ_R5ERS;
 	} else if (strcasecmp(str, "RAID5E") == 0 ||
 		   strcasecmp(str, "RAID5E-LA") == 0) {
 		*level = G_RAID_VOLUME_RL_RAID5E;
 		*qual = G_RAID_VOLUME_RLQ_R5ELA;
 	} else if (strcasecmp(str, "RAID5E-LS") == 0) {
 		*level = G_RAID_VOLUME_RL_RAID5E;
 		*qual = G_RAID_VOLUME_RLQ_R5ELS;
 	} else if (strcasecmp(str, "RAID5EE-RA") == 0) {
 		*level = G_RAID_VOLUME_RL_RAID5EE;
 		*qual = G_RAID_VOLUME_RLQ_R5EERA;
 	} else if (strcasecmp(str, "RAID5EE-RS") == 0) {
 		*level = G_RAID_VOLUME_RL_RAID5EE;
 		*qual = G_RAID_VOLUME_RLQ_R5EERS;
 	} else if (strcasecmp(str, "RAID5EE") == 0 ||
 		   strcasecmp(str, "RAID5EE-LA") == 0) {
 		*level = G_RAID_VOLUME_RL_RAID5EE;
 		*qual = G_RAID_VOLUME_RLQ_R5EELA;
 	} else if (strcasecmp(str, "RAID5EE-LS") == 0) {
 		*level = G_RAID_VOLUME_RL_RAID5EE;
 		*qual = G_RAID_VOLUME_RLQ_R5EELS;
 	} else if (strcasecmp(str, "RAID5R-RA") == 0) {
 		*level = G_RAID_VOLUME_RL_RAID5R;
 		*qual = G_RAID_VOLUME_RLQ_R5RRA;
 	} else if (strcasecmp(str, "RAID5R-RS") == 0) {
 		*level = G_RAID_VOLUME_RL_RAID5R;
 		*qual = G_RAID_VOLUME_RLQ_R5RRS;
 	} else if (strcasecmp(str, "RAID5R") == 0 ||
 		   strcasecmp(str, "RAID5R-LA") == 0) {
 		*level = G_RAID_VOLUME_RL_RAID5R;
 		*qual = G_RAID_VOLUME_RLQ_R5RLA;
 	} else if (strcasecmp(str, "RAID5R-LS") == 0) {
 		*level = G_RAID_VOLUME_RL_RAID5R;
 		*qual = G_RAID_VOLUME_RLQ_R5RLS;
 	} else
 		return (-1);
 	return (0);
 }
 
 const char *
 g_raid_get_diskname(struct g_raid_disk *disk)
 {
 
 	if (disk->d_consumer == NULL || disk->d_consumer->provider == NULL)
 		return ("[unknown]");
 	return (disk->d_consumer->provider->name);
 }
 
 void
 g_raid_get_disk_info(struct g_raid_disk *disk)
 {
 	struct g_consumer *cp = disk->d_consumer;
 	int error, len;
 
 	/* Read kernel dumping information. */
 	disk->d_kd.offset = 0;
 	disk->d_kd.length = OFF_MAX;
 	len = sizeof(disk->d_kd);
 	error = g_io_getattr("GEOM::kerneldump", cp, &len, &disk->d_kd);
 	if (error)
 		disk->d_kd.di.dumper = NULL;
 	if (disk->d_kd.di.dumper == NULL)
 		G_RAID_DEBUG1(2, disk->d_softc,
 		    "Dumping not supported by %s: %d.", 
 		    cp->provider->name, error);
 
 	/* Read BIO_DELETE support. */
 	error = g_getattr("GEOM::candelete", cp, &disk->d_candelete);
 	if (error)
 		disk->d_candelete = 0;
 	if (!disk->d_candelete)
 		G_RAID_DEBUG1(2, disk->d_softc,
 		    "BIO_DELETE not supported by %s: %d.", 
 		    cp->provider->name, error);
 }
 
 void
 g_raid_report_disk_state(struct g_raid_disk *disk)
 {
 	struct g_raid_subdisk *sd;
 	int len, state;
 	uint32_t s;
 
 	if (disk->d_consumer == NULL)
 		return;
 	if (disk->d_state == G_RAID_DISK_S_DISABLED) {
 		s = G_STATE_ACTIVE; /* XXX */
 	} else if (disk->d_state == G_RAID_DISK_S_FAILED ||
 	    disk->d_state == G_RAID_DISK_S_STALE_FAILED) {
 		s = G_STATE_FAILED;
 	} else {
 		state = G_RAID_SUBDISK_S_ACTIVE;
 		TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) {
 			if (sd->sd_state < state)
 				state = sd->sd_state;
 		}
 		if (state == G_RAID_SUBDISK_S_FAILED)
 			s = G_STATE_FAILED;
 		else if (state == G_RAID_SUBDISK_S_NEW ||
 		    state == G_RAID_SUBDISK_S_REBUILD)
 			s = G_STATE_REBUILD;
 		else if (state == G_RAID_SUBDISK_S_STALE ||
 		    state == G_RAID_SUBDISK_S_RESYNC)
 			s = G_STATE_RESYNC;
 		else
 			s = G_STATE_ACTIVE;
 	}
 	len = sizeof(s);
 	g_io_getattr("GEOM::setstate", disk->d_consumer, &len, &s);
 	G_RAID_DEBUG1(2, disk->d_softc, "Disk %s state reported as %d.",
 	    g_raid_get_diskname(disk), s);
 }
 
 void
 g_raid_change_disk_state(struct g_raid_disk *disk, int state)
 {
 
 	G_RAID_DEBUG1(0, disk->d_softc, "Disk %s state changed from %s to %s.",
 	    g_raid_get_diskname(disk),
 	    g_raid_disk_state2str(disk->d_state),
 	    g_raid_disk_state2str(state));
 	disk->d_state = state;
 	g_raid_report_disk_state(disk);
 }
 
 void
 g_raid_change_subdisk_state(struct g_raid_subdisk *sd, int state)
 {
 
 	G_RAID_DEBUG1(0, sd->sd_softc,
 	    "Subdisk %s:%d-%s state changed from %s to %s.",
 	    sd->sd_volume->v_name, sd->sd_pos,
 	    sd->sd_disk ? g_raid_get_diskname(sd->sd_disk) : "[none]",
 	    g_raid_subdisk_state2str(sd->sd_state),
 	    g_raid_subdisk_state2str(state));
 	sd->sd_state = state;
 	if (sd->sd_disk)
 		g_raid_report_disk_state(sd->sd_disk);
 }
 
 void
 g_raid_change_volume_state(struct g_raid_volume *vol, int state)
 {
 
 	G_RAID_DEBUG1(0, vol->v_softc,
 	    "Volume %s state changed from %s to %s.",
 	    vol->v_name,
 	    g_raid_volume_state2str(vol->v_state),
 	    g_raid_volume_state2str(state));
 	vol->v_state = state;
 }
 
 /*
  * --- Events handling functions ---
  * Events in geom_raid are used to maintain subdisks and volumes status
  * from one thread to simplify locking.
  */
 static void
 g_raid_event_free(struct g_raid_event *ep)
 {
 
 	free(ep, M_RAID);
 }
 
 int
 g_raid_event_send(void *arg, int event, int flags)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_event *ep;
 	int error;
 
 	if ((flags & G_RAID_EVENT_VOLUME) != 0) {
 		sc = ((struct g_raid_volume *)arg)->v_softc;
 	} else if ((flags & G_RAID_EVENT_DISK) != 0) {
 		sc = ((struct g_raid_disk *)arg)->d_softc;
 	} else if ((flags & G_RAID_EVENT_SUBDISK) != 0) {
 		sc = ((struct g_raid_subdisk *)arg)->sd_softc;
 	} else {
 		sc = arg;
 	}
 	ep = malloc(sizeof(*ep), M_RAID,
 	    sx_xlocked(&sc->sc_lock) ? M_WAITOK : M_NOWAIT);
 	if (ep == NULL)
 		return (ENOMEM);
 	ep->e_tgt = arg;
 	ep->e_event = event;
 	ep->e_flags = flags;
 	ep->e_error = 0;
 	G_RAID_DEBUG1(4, sc, "Sending event %p. Waking up %p.", ep, sc);
 	mtx_lock(&sc->sc_queue_mtx);
 	TAILQ_INSERT_TAIL(&sc->sc_events, ep, e_next);
 	mtx_unlock(&sc->sc_queue_mtx);
 	wakeup(sc);
 
 	if ((flags & G_RAID_EVENT_WAIT) == 0)
 		return (0);
 
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 	G_RAID_DEBUG1(4, sc, "Sleeping on %p.", ep);
 	sx_xunlock(&sc->sc_lock);
 	while ((ep->e_flags & G_RAID_EVENT_DONE) == 0) {
 		mtx_lock(&sc->sc_queue_mtx);
 		MSLEEP(error, ep, &sc->sc_queue_mtx, PRIBIO | PDROP, "m:event",
 		    hz * 5);
 	}
 	error = ep->e_error;
 	g_raid_event_free(ep);
 	sx_xlock(&sc->sc_lock);
 	return (error);
 }
 
 static void
 g_raid_event_cancel(struct g_raid_softc *sc, void *tgt)
 {
 	struct g_raid_event *ep, *tmpep;
 
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	mtx_lock(&sc->sc_queue_mtx);
 	TAILQ_FOREACH_SAFE(ep, &sc->sc_events, e_next, tmpep) {
 		if (ep->e_tgt != tgt)
 			continue;
 		TAILQ_REMOVE(&sc->sc_events, ep, e_next);
 		if ((ep->e_flags & G_RAID_EVENT_WAIT) == 0)
 			g_raid_event_free(ep);
 		else {
 			ep->e_error = ECANCELED;
 			wakeup(ep);
 		}
 	}
 	mtx_unlock(&sc->sc_queue_mtx);
 }
 
 static int
 g_raid_event_check(struct g_raid_softc *sc, void *tgt)
 {
 	struct g_raid_event *ep;
 	int	res = 0;
 
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	mtx_lock(&sc->sc_queue_mtx);
 	TAILQ_FOREACH(ep, &sc->sc_events, e_next) {
 		if (ep->e_tgt != tgt)
 			continue;
 		res = 1;
 		break;
 	}
 	mtx_unlock(&sc->sc_queue_mtx);
 	return (res);
 }
 
 /*
  * Return the number of disks in given state.
  * If state is equal to -1, count all connected disks.
  */
 u_int
 g_raid_ndisks(struct g_raid_softc *sc, int state)
 {
 	struct g_raid_disk *disk;
 	u_int n;
 
 	sx_assert(&sc->sc_lock, SX_LOCKED);
 
 	n = 0;
 	TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (disk->d_state == state || state == -1)
 			n++;
 	}
 	return (n);
 }
 
 /*
  * Return the number of subdisks in given state.
  * If state is equal to -1, count all connected disks.
  */
 u_int
 g_raid_nsubdisks(struct g_raid_volume *vol, int state)
 {
 	struct g_raid_subdisk *subdisk;
 	struct g_raid_softc *sc;
 	u_int i, n ;
 
 	sc = vol->v_softc;
 	sx_assert(&sc->sc_lock, SX_LOCKED);
 
 	n = 0;
 	for (i = 0; i < vol->v_disks_count; i++) {
 		subdisk = &vol->v_subdisks[i];
 		if ((state == -1 &&
 		     subdisk->sd_state != G_RAID_SUBDISK_S_NONE) ||
 		    subdisk->sd_state == state)
 			n++;
 	}
 	return (n);
 }
 
 /*
  * Return the first subdisk in given state.
  * If state is equal to -1, then the first connected disks.
  */
 struct g_raid_subdisk *
 g_raid_get_subdisk(struct g_raid_volume *vol, int state)
 {
 	struct g_raid_subdisk *sd;
 	struct g_raid_softc *sc;
 	u_int i;
 
 	sc = vol->v_softc;
 	sx_assert(&sc->sc_lock, SX_LOCKED);
 
 	for (i = 0; i < vol->v_disks_count; i++) {
 		sd = &vol->v_subdisks[i];
 		if ((state == -1 &&
 		     sd->sd_state != G_RAID_SUBDISK_S_NONE) ||
 		    sd->sd_state == state)
 			return (sd);
 	}
 	return (NULL);
 }
 
 struct g_consumer *
 g_raid_open_consumer(struct g_raid_softc *sc, const char *name)
 {
 	struct g_consumer *cp;
 	struct g_provider *pp;
 
 	g_topology_assert();
 
 	if (strncmp(name, "/dev/", 5) == 0)
 		name += 5;
 	pp = g_provider_by_name(name);
 	if (pp == NULL)
 		return (NULL);
 	cp = g_new_consumer(sc->sc_geom);
 	cp->flags |= G_CF_DIRECT_RECEIVE;
 	if (g_attach(cp, pp) != 0) {
 		g_destroy_consumer(cp);
 		return (NULL);
 	}
 	if (g_access(cp, 1, 1, 1) != 0) {
 		g_detach(cp);
 		g_destroy_consumer(cp);
 		return (NULL);
 	}
 	return (cp);
 }
 
 static u_int
 g_raid_nrequests(struct g_raid_softc *sc, struct g_consumer *cp)
 {
 	struct bio *bp;
 	u_int nreqs = 0;
 
 	mtx_lock(&sc->sc_queue_mtx);
 	TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) {
 		if (bp->bio_from == cp)
 			nreqs++;
 	}
 	mtx_unlock(&sc->sc_queue_mtx);
 	return (nreqs);
 }
 
 u_int
 g_raid_nopens(struct g_raid_softc *sc)
 {
 	struct g_raid_volume *vol;
 	u_int opens;
 
 	opens = 0;
 	TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
 		if (vol->v_provider_open != 0)
 			opens++;
 	}
 	return (opens);
 }
 
 static int
 g_raid_consumer_is_busy(struct g_raid_softc *sc, struct g_consumer *cp)
 {
 
 	if (cp->index > 0) {
 		G_RAID_DEBUG1(2, sc,
 		    "I/O requests for %s exist, can't destroy it now.",
 		    cp->provider->name);
 		return (1);
 	}
 	if (g_raid_nrequests(sc, cp) > 0) {
 		G_RAID_DEBUG1(2, sc,
 		    "I/O requests for %s in queue, can't destroy it now.",
 		    cp->provider->name);
 		return (1);
 	}
 	return (0);
 }
 
 static void
 g_raid_destroy_consumer(void *arg, int flags __unused)
 {
 	struct g_consumer *cp;
 
 	g_topology_assert();
 
 	cp = arg;
 	G_RAID_DEBUG(1, "Consumer %s destroyed.", cp->provider->name);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 }
 
 void
 g_raid_kill_consumer(struct g_raid_softc *sc, struct g_consumer *cp)
 {
 	struct g_provider *pp;
 	int retaste_wait;
 
 	g_topology_assert_not();
 
 	g_topology_lock();
 	cp->private = NULL;
 	if (g_raid_consumer_is_busy(sc, cp))
 		goto out;
 	pp = cp->provider;
 	retaste_wait = 0;
 	if (cp->acw == 1) {
 		if ((pp->geom->flags & G_GEOM_WITHER) == 0)
 			retaste_wait = 1;
 	}
 	if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
 		g_access(cp, -cp->acr, -cp->acw, -cp->ace);
 	if (retaste_wait) {
 		/*
 		 * After retaste event was send (inside g_access()), we can send
 		 * event to detach and destroy consumer.
 		 * A class, which has consumer to the given provider connected
 		 * will not receive retaste event for the provider.
 		 * This is the way how I ignore retaste events when I close
 		 * consumers opened for write: I detach and destroy consumer
 		 * after retaste event is sent.
 		 */
 		g_post_event(g_raid_destroy_consumer, cp, M_WAITOK, NULL);
 		goto out;
 	}
 	G_RAID_DEBUG(1, "Consumer %s destroyed.", pp->name);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 out:
 	g_topology_unlock();
 }
 
 static void
 g_raid_orphan(struct g_consumer *cp)
 {
 	struct g_raid_disk *disk;
 
 	g_topology_assert();
 
 	disk = cp->private;
 	if (disk == NULL)
 		return;
 	g_raid_event_send(disk, G_RAID_DISK_E_DISCONNECTED,
 	    G_RAID_EVENT_DISK);
 }
 
 static void
 g_raid_clean(struct g_raid_volume *vol, int acw)
 {
 	struct g_raid_softc *sc;
 	int timeout;
 
 	sc = vol->v_softc;
 	g_topology_assert_not();
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 //	if ((sc->sc_flags & G_RAID_DEVICE_FLAG_NOFAILSYNC) != 0)
 //		return;
 	if (!vol->v_dirty)
 		return;
 	if (vol->v_writes > 0)
 		return;
 	if (acw > 0 || (acw == -1 &&
 	    vol->v_provider != NULL && vol->v_provider->acw > 0)) {
 		timeout = g_raid_clean_time - (time_uptime - vol->v_last_write);
 		if (!g_raid_shutdown && timeout > 0)
 			return;
 	}
 	vol->v_dirty = 0;
 	G_RAID_DEBUG1(1, sc, "Volume %s marked as clean.",
 	    vol->v_name);
 	g_raid_write_metadata(sc, vol, NULL, NULL);
 }
 
 static void
 g_raid_dirty(struct g_raid_volume *vol)
 {
 	struct g_raid_softc *sc;
 
 	sc = vol->v_softc;
 	g_topology_assert_not();
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 //	if ((sc->sc_flags & G_RAID_DEVICE_FLAG_NOFAILSYNC) != 0)
 //		return;
 	vol->v_dirty = 1;
 	G_RAID_DEBUG1(1, sc, "Volume %s marked as dirty.",
 	    vol->v_name);
 	g_raid_write_metadata(sc, vol, NULL, NULL);
 }
 
 void
 g_raid_tr_flush_common(struct g_raid_tr_object *tr, struct bio *bp)
 {
 	struct g_raid_volume *vol;
 	struct g_raid_subdisk *sd;
 	struct bio_queue_head queue;
 	struct bio *cbp;
 	int i;
 
 	vol = tr->tro_volume;
 
 	/*
 	 * Allocate all bios before sending any request, so we can return
 	 * ENOMEM in nice and clean way.
 	 */
 	bioq_init(&queue);
 	for (i = 0; i < vol->v_disks_count; i++) {
 		sd = &vol->v_subdisks[i];
 		if (sd->sd_state == G_RAID_SUBDISK_S_NONE ||
 		    sd->sd_state == G_RAID_SUBDISK_S_FAILED)
 			continue;
 		cbp = g_clone_bio(bp);
 		if (cbp == NULL)
 			goto failure;
 		cbp->bio_caller1 = sd;
 		bioq_insert_tail(&queue, cbp);
 	}
 	while ((cbp = bioq_takefirst(&queue)) != NULL) {
 		sd = cbp->bio_caller1;
 		cbp->bio_caller1 = NULL;
 		g_raid_subdisk_iostart(sd, cbp);
 	}
 	return;
 failure:
 	while ((cbp = bioq_takefirst(&queue)) != NULL)
 		g_destroy_bio(cbp);
 	if (bp->bio_error == 0)
 		bp->bio_error = ENOMEM;
 	g_raid_iodone(bp, bp->bio_error);
 }
 
 static void
 g_raid_tr_kerneldump_common_done(struct bio *bp)
 {
 
 	bp->bio_flags |= BIO_DONE;
 }
 
 int
 g_raid_tr_kerneldump_common(struct g_raid_tr_object *tr,
     void *virtual, vm_offset_t physical, off_t offset, size_t length)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_volume *vol;
 	struct bio bp;
 
 	vol = tr->tro_volume;
 	sc = vol->v_softc;
 
 	g_reset_bio(&bp);
 	bp.bio_cmd = BIO_WRITE;
 	bp.bio_done = g_raid_tr_kerneldump_common_done;
 	bp.bio_attribute = NULL;
 	bp.bio_offset = offset;
 	bp.bio_length = length;
 	bp.bio_data = virtual;
 	bp.bio_to = vol->v_provider;
 
 	g_raid_start(&bp);
 	while (!(bp.bio_flags & BIO_DONE)) {
 		G_RAID_DEBUG1(4, sc, "Poll...");
 		g_raid_poll(sc);
 		DELAY(10);
 	}
 
 	return (bp.bio_error != 0 ? EIO : 0);
 }
 
 static int
 g_raid_dump(void *arg,
     void *virtual, vm_offset_t physical, off_t offset, size_t length)
 {
 	struct g_raid_volume *vol;
 	int error;
 
 	vol = (struct g_raid_volume *)arg;
 	G_RAID_DEBUG1(3, vol->v_softc, "Dumping at off %llu len %llu.",
 	    (long long unsigned)offset, (long long unsigned)length);
 
 	error = G_RAID_TR_KERNELDUMP(vol->v_tr,
 	    virtual, physical, offset, length);
 	return (error);
 }
 
 static void
 g_raid_kerneldump(struct g_raid_softc *sc, struct bio *bp)
 {
 	struct g_kerneldump *gkd;
 	struct g_provider *pp;
 	struct g_raid_volume *vol;
 
 	gkd = (struct g_kerneldump*)bp->bio_data;
 	pp = bp->bio_to;
 	vol = pp->private;
 	g_trace(G_T_TOPOLOGY, "g_raid_kerneldump(%s, %jd, %jd)",
 		pp->name, (intmax_t)gkd->offset, (intmax_t)gkd->length);
 	gkd->di.dumper = g_raid_dump;
 	gkd->di.priv = vol;
 	gkd->di.blocksize = vol->v_sectorsize;
 	gkd->di.maxiosize = DFLTPHYS;
 	gkd->di.mediaoffset = gkd->offset;
 	if ((gkd->offset + gkd->length) > vol->v_mediasize)
 		gkd->length = vol->v_mediasize - gkd->offset;
 	gkd->di.mediasize = gkd->length;
 	g_io_deliver(bp, 0);
 }
 
 static void
 g_raid_candelete(struct g_raid_softc *sc, struct bio *bp)
 {
 	struct g_provider *pp;
 	struct g_raid_volume *vol;
 	struct g_raid_subdisk *sd;
 	int i, val;
 
 	pp = bp->bio_to;
 	vol = pp->private;
 	for (i = 0; i < vol->v_disks_count; i++) {
 		sd = &vol->v_subdisks[i];
 		if (sd->sd_state == G_RAID_SUBDISK_S_NONE)
 			continue;
 		if (sd->sd_disk->d_candelete)
 			break;
 	}
 	val = i < vol->v_disks_count;
 	g_handleattr(bp, "GEOM::candelete", &val, sizeof(val));
 }
 
 static void
 g_raid_start(struct bio *bp)
 {
 	struct g_raid_softc *sc;
 
 	sc = bp->bio_to->geom->softc;
 	/*
 	 * If sc == NULL or there are no valid disks, provider's error
 	 * should be set and g_raid_start() should not be called at all.
 	 */
 //	KASSERT(sc != NULL && sc->sc_state == G_RAID_VOLUME_S_RUNNING,
 //	    ("Provider's error should be set (error=%d)(mirror=%s).",
 //	    bp->bio_to->error, bp->bio_to->name));
 	G_RAID_LOGREQ(3, bp, "Request received.");
 
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 	case BIO_WRITE:
 	case BIO_DELETE:
 	case BIO_FLUSH:
 		break;
 	case BIO_GETATTR:
 		if (!strcmp(bp->bio_attribute, "GEOM::candelete"))
 			g_raid_candelete(sc, bp);
 		else if (!strcmp(bp->bio_attribute, "GEOM::kerneldump"))
 			g_raid_kerneldump(sc, bp);
 		else
 			g_io_deliver(bp, EOPNOTSUPP);
 		return;
 	default:
 		g_io_deliver(bp, EOPNOTSUPP);
 		return;
 	}
 	mtx_lock(&sc->sc_queue_mtx);
 	bioq_insert_tail(&sc->sc_queue, bp);
 	mtx_unlock(&sc->sc_queue_mtx);
 	if (!dumping) {
 		G_RAID_DEBUG1(4, sc, "Waking up %p.", sc);
 		wakeup(sc);
 	}
 }
 
 static int
 g_raid_bio_overlaps(const struct bio *bp, off_t lstart, off_t len)
 {
 	/*
 	 * 5 cases:
 	 * (1) bp entirely below NO
 	 * (2) bp entirely above NO
 	 * (3) bp start below, but end in range YES
 	 * (4) bp entirely within YES
 	 * (5) bp starts within, ends above YES
 	 *
 	 * lock range 10-19 (offset 10 length 10)
 	 * (1) 1-5: first if kicks it out
 	 * (2) 30-35: second if kicks it out
 	 * (3) 5-15: passes both ifs
 	 * (4) 12-14: passes both ifs
 	 * (5) 19-20: passes both
 	 */
 	off_t lend = lstart + len - 1;
 	off_t bstart = bp->bio_offset;
 	off_t bend = bp->bio_offset + bp->bio_length - 1;
 
 	if (bend < lstart)
 		return (0);
 	if (lend < bstart)
 		return (0);
 	return (1);
 }
 
 static int
 g_raid_is_in_locked_range(struct g_raid_volume *vol, const struct bio *bp)
 {
 	struct g_raid_lock *lp;
 
 	sx_assert(&vol->v_softc->sc_lock, SX_LOCKED);
 
 	LIST_FOREACH(lp, &vol->v_locks, l_next) {
 		if (g_raid_bio_overlaps(bp, lp->l_offset, lp->l_length))
 			return (1);
 	}
 	return (0);
 }
 
 static void
 g_raid_start_request(struct bio *bp)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_volume *vol;
 
 	sc = bp->bio_to->geom->softc;
 	sx_assert(&sc->sc_lock, SX_LOCKED);
 	vol = bp->bio_to->private;
 
 	/*
 	 * Check to see if this item is in a locked range.  If so,
 	 * queue it to our locked queue and return.  We'll requeue
 	 * it when the range is unlocked.  Internal I/O for the
 	 * rebuild/rescan/recovery process is excluded from this
 	 * check so we can actually do the recovery.
 	 */
 	if (!(bp->bio_cflags & G_RAID_BIO_FLAG_SPECIAL) &&
 	    g_raid_is_in_locked_range(vol, bp)) {
 		G_RAID_LOGREQ(3, bp, "Defer request.");
 		bioq_insert_tail(&vol->v_locked, bp);
 		return;
 	}
 
 	/*
 	 * If we're actually going to do the write/delete, then
 	 * update the idle stats for the volume.
 	 */
 	if (bp->bio_cmd == BIO_WRITE || bp->bio_cmd == BIO_DELETE) {
 		if (!vol->v_dirty)
 			g_raid_dirty(vol);
 		vol->v_writes++;
 	}
 
 	/*
 	 * Put request onto inflight queue, so we can check if new
 	 * synchronization requests don't collide with it.  Then tell
 	 * the transformation layer to start the I/O.
 	 */
 	bioq_insert_tail(&vol->v_inflight, bp);
 	G_RAID_LOGREQ(4, bp, "Request started");
 	G_RAID_TR_IOSTART(vol->v_tr, bp);
 }
 
 static void
 g_raid_finish_with_locked_ranges(struct g_raid_volume *vol, struct bio *bp)
 {
 	off_t off, len;
 	struct bio *nbp;
 	struct g_raid_lock *lp;
 
 	vol->v_pending_lock = 0;
 	LIST_FOREACH(lp, &vol->v_locks, l_next) {
 		if (lp->l_pending) {
 			off = lp->l_offset;
 			len = lp->l_length;
 			lp->l_pending = 0;
 			TAILQ_FOREACH(nbp, &vol->v_inflight.queue, bio_queue) {
 				if (g_raid_bio_overlaps(nbp, off, len))
 					lp->l_pending++;
 			}
 			if (lp->l_pending) {
 				vol->v_pending_lock = 1;
 				G_RAID_DEBUG1(4, vol->v_softc,
 				    "Deferred lock(%jd, %jd) has %d pending",
 				    (intmax_t)off, (intmax_t)(off + len),
 				    lp->l_pending);
 				continue;
 			}
 			G_RAID_DEBUG1(4, vol->v_softc,
 			    "Deferred lock of %jd to %jd completed",
 			    (intmax_t)off, (intmax_t)(off + len));
 			G_RAID_TR_LOCKED(vol->v_tr, lp->l_callback_arg);
 		}
 	}
 }
 
 void
 g_raid_iodone(struct bio *bp, int error)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_volume *vol;
 
 	sc = bp->bio_to->geom->softc;
 	sx_assert(&sc->sc_lock, SX_LOCKED);
 	vol = bp->bio_to->private;
 	G_RAID_LOGREQ(3, bp, "Request done: %d.", error);
 
 	/* Update stats if we done write/delete. */
 	if (bp->bio_cmd == BIO_WRITE || bp->bio_cmd == BIO_DELETE) {
 		vol->v_writes--;
 		vol->v_last_write = time_uptime;
 	}
 
 	bioq_remove(&vol->v_inflight, bp);
 	if (vol->v_pending_lock && g_raid_is_in_locked_range(vol, bp))
 		g_raid_finish_with_locked_ranges(vol, bp);
 	getmicrouptime(&vol->v_last_done);
 	g_io_deliver(bp, error);
 }
 
 int
 g_raid_lock_range(struct g_raid_volume *vol, off_t off, off_t len,
     struct bio *ignore, void *argp)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_lock *lp;
 	struct bio *bp;
 
 	sc = vol->v_softc;
 	lp = malloc(sizeof(*lp), M_RAID, M_WAITOK | M_ZERO);
 	LIST_INSERT_HEAD(&vol->v_locks, lp, l_next);
 	lp->l_offset = off;
 	lp->l_length = len;
 	lp->l_callback_arg = argp;
 
 	lp->l_pending = 0;
 	TAILQ_FOREACH(bp, &vol->v_inflight.queue, bio_queue) {
 		if (bp != ignore && g_raid_bio_overlaps(bp, off, len))
 			lp->l_pending++;
 	}	
 
 	/*
 	 * If there are any writes that are pending, we return EBUSY.  All
 	 * callers will have to wait until all pending writes clear.
 	 */
 	if (lp->l_pending > 0) {
 		vol->v_pending_lock = 1;
 		G_RAID_DEBUG1(4, sc, "Locking range %jd to %jd deferred %d pend",
 		    (intmax_t)off, (intmax_t)(off+len), lp->l_pending);
 		return (EBUSY);
 	}
 	G_RAID_DEBUG1(4, sc, "Locking range %jd to %jd",
 	    (intmax_t)off, (intmax_t)(off+len));
 	G_RAID_TR_LOCKED(vol->v_tr, lp->l_callback_arg);
 	return (0);
 }
 
 int
 g_raid_unlock_range(struct g_raid_volume *vol, off_t off, off_t len)
 {
 	struct g_raid_lock *lp;
 	struct g_raid_softc *sc;
 	struct bio *bp;
 
 	sc = vol->v_softc;
 	LIST_FOREACH(lp, &vol->v_locks, l_next) {
 		if (lp->l_offset == off && lp->l_length == len) {
 			LIST_REMOVE(lp, l_next);
 			/* XXX
 			 * Right now we just put them all back on the queue
 			 * and hope for the best.  We hope this because any
 			 * locked ranges will go right back on this list
 			 * when the worker thread runs.
 			 * XXX
 			 */
 			G_RAID_DEBUG1(4, sc, "Unlocked %jd to %jd",
 			    (intmax_t)lp->l_offset,
 			    (intmax_t)(lp->l_offset+lp->l_length));
 			mtx_lock(&sc->sc_queue_mtx);
 			while ((bp = bioq_takefirst(&vol->v_locked)) != NULL)
 				bioq_insert_tail(&sc->sc_queue, bp);
 			mtx_unlock(&sc->sc_queue_mtx);
 			free(lp, M_RAID);
 			return (0);
 		}
 	}
 	return (EINVAL);
 }
 
 void
 g_raid_subdisk_iostart(struct g_raid_subdisk *sd, struct bio *bp)
 {
 	struct g_consumer *cp;
 	struct g_raid_disk *disk, *tdisk;
 
 	bp->bio_caller1 = sd;
 
 	/*
 	 * Make sure that the disk is present. Generally it is a task of
 	 * transformation layers to not send requests to absent disks, but
 	 * it is better to be safe and report situation then sorry.
 	 */
 	if (sd->sd_disk == NULL) {
 		G_RAID_LOGREQ(0, bp, "Warning! I/O request to an absent disk!");
 nodisk:
 		bp->bio_from = NULL;
 		bp->bio_to = NULL;
 		bp->bio_error = ENXIO;
 		g_raid_disk_done(bp);
 		return;
 	}
 	disk = sd->sd_disk;
 	if (disk->d_state != G_RAID_DISK_S_ACTIVE &&
 	    disk->d_state != G_RAID_DISK_S_FAILED) {
 		G_RAID_LOGREQ(0, bp, "Warning! I/O request to a disk in a "
 		    "wrong state (%s)!", g_raid_disk_state2str(disk->d_state));
 		goto nodisk;
 	}
 
 	cp = disk->d_consumer;
 	bp->bio_from = cp;
 	bp->bio_to = cp->provider;
 	cp->index++;
 
 	/* Update average disks load. */
 	TAILQ_FOREACH(tdisk, &sd->sd_softc->sc_disks, d_next) {
 		if (tdisk->d_consumer == NULL)
 			tdisk->d_load = 0;
 		else
 			tdisk->d_load = (tdisk->d_consumer->index *
 			    G_RAID_SUBDISK_LOAD_SCALE + tdisk->d_load * 7) / 8;
 	}
 
 	disk->d_last_offset = bp->bio_offset + bp->bio_length;
 	if (dumping) {
 		G_RAID_LOGREQ(3, bp, "Sending dumping request.");
 		if (bp->bio_cmd == BIO_WRITE) {
 			bp->bio_error = g_raid_subdisk_kerneldump(sd,
 			    bp->bio_data, 0, bp->bio_offset, bp->bio_length);
 		} else
 			bp->bio_error = EOPNOTSUPP;
 		g_raid_disk_done(bp);
 	} else {
 		bp->bio_done = g_raid_disk_done;
 		bp->bio_offset += sd->sd_offset;
 		G_RAID_LOGREQ(3, bp, "Sending request.");
 		g_io_request(bp, cp);
 	}
 }
 
 int
 g_raid_subdisk_kerneldump(struct g_raid_subdisk *sd,
     void *virtual, vm_offset_t physical, off_t offset, size_t length)
 {
 
 	if (sd->sd_disk == NULL)
 		return (ENXIO);
 	if (sd->sd_disk->d_kd.di.dumper == NULL)
 		return (EOPNOTSUPP);
 	return (dump_write(&sd->sd_disk->d_kd.di,
 	    virtual, physical,
 	    sd->sd_disk->d_kd.di.mediaoffset + sd->sd_offset + offset,
 	    length));
 }
 
 static void
 g_raid_disk_done(struct bio *bp)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_subdisk *sd;
 
 	sd = bp->bio_caller1;
 	sc = sd->sd_softc;
 	mtx_lock(&sc->sc_queue_mtx);
 	bioq_insert_tail(&sc->sc_queue, bp);
 	mtx_unlock(&sc->sc_queue_mtx);
 	if (!dumping)
 		wakeup(sc);
 }
 
 static void
 g_raid_disk_done_request(struct bio *bp)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_disk *disk;
 	struct g_raid_subdisk *sd;
 	struct g_raid_volume *vol;
 
 	g_topology_assert_not();
 
 	G_RAID_LOGREQ(3, bp, "Disk request done: %d.", bp->bio_error);
 	sd = bp->bio_caller1;
 	sc = sd->sd_softc;
 	vol = sd->sd_volume;
 	if (bp->bio_from != NULL) {
 		bp->bio_from->index--;
 		disk = bp->bio_from->private;
 		if (disk == NULL)
 			g_raid_kill_consumer(sc, bp->bio_from);
 	}
 	bp->bio_offset -= sd->sd_offset;
 
 	G_RAID_TR_IODONE(vol->v_tr, sd, bp);
 }
 
 static void
 g_raid_handle_event(struct g_raid_softc *sc, struct g_raid_event *ep)
 {
 
 	if ((ep->e_flags & G_RAID_EVENT_VOLUME) != 0)
 		ep->e_error = g_raid_update_volume(ep->e_tgt, ep->e_event);
 	else if ((ep->e_flags & G_RAID_EVENT_DISK) != 0)
 		ep->e_error = g_raid_update_disk(ep->e_tgt, ep->e_event);
 	else if ((ep->e_flags & G_RAID_EVENT_SUBDISK) != 0)
 		ep->e_error = g_raid_update_subdisk(ep->e_tgt, ep->e_event);
 	else
 		ep->e_error = g_raid_update_node(ep->e_tgt, ep->e_event);
 	if ((ep->e_flags & G_RAID_EVENT_WAIT) == 0) {
 		KASSERT(ep->e_error == 0,
 		    ("Error cannot be handled."));
 		g_raid_event_free(ep);
 	} else {
 		ep->e_flags |= G_RAID_EVENT_DONE;
 		G_RAID_DEBUG1(4, sc, "Waking up %p.", ep);
 		mtx_lock(&sc->sc_queue_mtx);
 		wakeup(ep);
 		mtx_unlock(&sc->sc_queue_mtx);
 	}
 }
 
 /*
  * Worker thread.
  */
 static void
 g_raid_worker(void *arg)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_event *ep;
 	struct g_raid_volume *vol;
 	struct bio *bp;
 	struct timeval now, t;
 	int timeout, rv;
 
 	sc = arg;
 	thread_lock(curthread);
 	sched_prio(curthread, PRIBIO);
 	thread_unlock(curthread);
 
 	sx_xlock(&sc->sc_lock);
 	for (;;) {
 		mtx_lock(&sc->sc_queue_mtx);
 		/*
 		 * First take a look at events.
 		 * This is important to handle events before any I/O requests.
 		 */
 		bp = NULL;
 		vol = NULL;
 		rv = 0;
 		ep = TAILQ_FIRST(&sc->sc_events);
 		if (ep != NULL)
 			TAILQ_REMOVE(&sc->sc_events, ep, e_next);
 		else if ((bp = bioq_takefirst(&sc->sc_queue)) != NULL)
 			;
 		else {
 			getmicrouptime(&now);
 			t = now;
 			TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
 				if (bioq_first(&vol->v_inflight) == NULL &&
 				    vol->v_tr &&
 				    timevalcmp(&vol->v_last_done, &t, < ))
 					t = vol->v_last_done;
 			}
 			timevalsub(&t, &now);
 			timeout = g_raid_idle_threshold +
 			    t.tv_sec * 1000000 + t.tv_usec;
 			if (timeout > 0) {
 				/*
 				 * Two steps to avoid overflows at HZ=1000
 				 * and idle timeouts > 2.1s.  Some rounding
 				 * errors can occur, but they are < 1tick,
 				 * which is deemed to be close enough for
 				 * this purpose.
 				 */
 				int micpertic = 1000000 / hz;
 				timeout = (timeout + micpertic - 1) / micpertic;
 				sx_xunlock(&sc->sc_lock);
 				MSLEEP(rv, sc, &sc->sc_queue_mtx,
 				    PRIBIO | PDROP, "-", timeout);
 				sx_xlock(&sc->sc_lock);
 				goto process;
 			} else
 				rv = EWOULDBLOCK;
 		}
 		mtx_unlock(&sc->sc_queue_mtx);
 process:
 		if (ep != NULL) {
 			g_raid_handle_event(sc, ep);
 		} else if (bp != NULL) {
 			if (bp->bio_to != NULL &&
 			    bp->bio_to->geom == sc->sc_geom)
 				g_raid_start_request(bp);
 			else
 				g_raid_disk_done_request(bp);
 		} else if (rv == EWOULDBLOCK) {
 			TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
 				g_raid_clean(vol, -1);
 				if (bioq_first(&vol->v_inflight) == NULL &&
 				    vol->v_tr) {
 					t.tv_sec = g_raid_idle_threshold / 1000000;
 					t.tv_usec = g_raid_idle_threshold % 1000000;
 					timevaladd(&t, &vol->v_last_done);
 					getmicrouptime(&now);
 					if (timevalcmp(&t, &now, <= )) {
 						G_RAID_TR_IDLE(vol->v_tr);
 						vol->v_last_done = now;
 					}
 				}
 			}
 		}
 		if (sc->sc_stopping == G_RAID_DESTROY_HARD)
 			g_raid_destroy_node(sc, 1);	/* May not return. */
 	}
 }
 
 static void
 g_raid_poll(struct g_raid_softc *sc)
 {
 	struct g_raid_event *ep;
 	struct bio *bp;
 
 	sx_xlock(&sc->sc_lock);
 	mtx_lock(&sc->sc_queue_mtx);
 	/*
 	 * First take a look at events.
 	 * This is important to handle events before any I/O requests.
 	 */
 	ep = TAILQ_FIRST(&sc->sc_events);
 	if (ep != NULL) {
 		TAILQ_REMOVE(&sc->sc_events, ep, e_next);
 		mtx_unlock(&sc->sc_queue_mtx);
 		g_raid_handle_event(sc, ep);
 		goto out;
 	}
 	bp = bioq_takefirst(&sc->sc_queue);
 	if (bp != NULL) {
 		mtx_unlock(&sc->sc_queue_mtx);
 		if (bp->bio_from == NULL ||
 		    bp->bio_from->geom != sc->sc_geom)
 			g_raid_start_request(bp);
 		else
 			g_raid_disk_done_request(bp);
 	}
 out:
 	sx_xunlock(&sc->sc_lock);
 }
 
 static void
 g_raid_launch_provider(struct g_raid_volume *vol)
 {
 	struct g_raid_disk *disk;
 	struct g_raid_subdisk *sd;
 	struct g_raid_softc *sc;
 	struct g_provider *pp;
 	char name[G_RAID_MAX_VOLUMENAME];
 	off_t off;
 	int i;
 
 	sc = vol->v_softc;
 	sx_assert(&sc->sc_lock, SX_LOCKED);
 
 	g_topology_lock();
 	/* Try to name provider with volume name. */
 	snprintf(name, sizeof(name), "raid/%s", vol->v_name);
 	if (g_raid_name_format == 0 || vol->v_name[0] == 0 ||
 	    g_provider_by_name(name) != NULL) {
 		/* Otherwise use sequential volume number. */
 		snprintf(name, sizeof(name), "raid/r%d", vol->v_global_id);
 	}
 
 	pp = g_new_providerf(sc->sc_geom, "%s", name);
 	pp->flags |= G_PF_DIRECT_RECEIVE;
 	if (vol->v_tr->tro_class->trc_accept_unmapped) {
 		pp->flags |= G_PF_ACCEPT_UNMAPPED;
 		for (i = 0; i < vol->v_disks_count; i++) {
 			sd = &vol->v_subdisks[i];
 			if (sd->sd_state == G_RAID_SUBDISK_S_NONE)
 				continue;
 			if ((sd->sd_disk->d_consumer->provider->flags &
 			    G_PF_ACCEPT_UNMAPPED) == 0)
 				pp->flags &= ~G_PF_ACCEPT_UNMAPPED;
 		}
 	}
 	pp->private = vol;
 	pp->mediasize = vol->v_mediasize;
 	pp->sectorsize = vol->v_sectorsize;
 	pp->stripesize = 0;
 	pp->stripeoffset = 0;
 	if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1 ||
 	    vol->v_raid_level == G_RAID_VOLUME_RL_RAID3 ||
 	    vol->v_raid_level == G_RAID_VOLUME_RL_SINGLE ||
 	    vol->v_raid_level == G_RAID_VOLUME_RL_CONCAT) {
 		if ((disk = vol->v_subdisks[0].sd_disk) != NULL &&
 		    disk->d_consumer != NULL &&
 		    disk->d_consumer->provider != NULL) {
 			pp->stripesize = disk->d_consumer->provider->stripesize;
 			off = disk->d_consumer->provider->stripeoffset;
 			pp->stripeoffset = off + vol->v_subdisks[0].sd_offset;
 			if (off > 0)
 				pp->stripeoffset %= off;
 		}
 		if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID3) {
 			pp->stripesize *= (vol->v_disks_count - 1);
 			pp->stripeoffset *= (vol->v_disks_count - 1);
 		}
 	} else
 		pp->stripesize = vol->v_strip_size;
 	vol->v_provider = pp;
 	g_error_provider(pp, 0);
 	g_topology_unlock();
 	G_RAID_DEBUG1(0, sc, "Provider %s for volume %s created.",
 	    pp->name, vol->v_name);
 }
 
 static void
 g_raid_destroy_provider(struct g_raid_volume *vol)
 {
 	struct g_raid_softc *sc;
 	struct g_provider *pp;
 	struct bio *bp, *tmp;
 
 	g_topology_assert_not();
 	sc = vol->v_softc;
 	pp = vol->v_provider;
 	KASSERT(pp != NULL, ("NULL provider (volume=%s).", vol->v_name));
 
 	g_topology_lock();
 	g_error_provider(pp, ENXIO);
 	mtx_lock(&sc->sc_queue_mtx);
 	TAILQ_FOREACH_SAFE(bp, &sc->sc_queue.queue, bio_queue, tmp) {
 		if (bp->bio_to != pp)
 			continue;
 		bioq_remove(&sc->sc_queue, bp);
 		g_io_deliver(bp, ENXIO);
 	}
 	mtx_unlock(&sc->sc_queue_mtx);
 	G_RAID_DEBUG1(0, sc, "Provider %s for volume %s destroyed.",
 	    pp->name, vol->v_name);
 	g_wither_provider(pp, ENXIO);
 	g_topology_unlock();
 	vol->v_provider = NULL;
 }
 
 /*
  * Update device state.
  */
 static int
 g_raid_update_volume(struct g_raid_volume *vol, u_int event)
 {
 	struct g_raid_softc *sc;
 
 	sc = vol->v_softc;
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	G_RAID_DEBUG1(2, sc, "Event %s for volume %s.",
 	    g_raid_volume_event2str(event),
 	    vol->v_name);
 	switch (event) {
 	case G_RAID_VOLUME_E_DOWN:
 		if (vol->v_provider != NULL)
 			g_raid_destroy_provider(vol);
 		break;
 	case G_RAID_VOLUME_E_UP:
 		if (vol->v_provider == NULL)
 			g_raid_launch_provider(vol);
 		break;
 	case G_RAID_VOLUME_E_START:
 		if (vol->v_tr)
 			G_RAID_TR_START(vol->v_tr);
 		return (0);
 	default:
 		if (sc->sc_md)
 			G_RAID_MD_VOLUME_EVENT(sc->sc_md, vol, event);
 		return (0);
 	}
 
 	/* Manage root mount release. */
 	if (vol->v_starting) {
 		vol->v_starting = 0;
 		G_RAID_DEBUG1(1, sc, "root_mount_rel %p", vol->v_rootmount);
 		root_mount_rel(vol->v_rootmount);
 		vol->v_rootmount = NULL;
 	}
 	if (vol->v_stopping && vol->v_provider_open == 0)
 		g_raid_destroy_volume(vol);
 	return (0);
 }
 
 /*
  * Update subdisk state.
  */
 static int
 g_raid_update_subdisk(struct g_raid_subdisk *sd, u_int event)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_volume *vol;
 
 	sc = sd->sd_softc;
 	vol = sd->sd_volume;
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	G_RAID_DEBUG1(2, sc, "Event %s for subdisk %s:%d-%s.",
 	    g_raid_subdisk_event2str(event),
 	    vol->v_name, sd->sd_pos,
 	    sd->sd_disk ? g_raid_get_diskname(sd->sd_disk) : "[none]");
 	if (vol->v_tr)
 		G_RAID_TR_EVENT(vol->v_tr, sd, event);
 
 	return (0);
 }
 
 /*
  * Update disk state.
  */
 static int
 g_raid_update_disk(struct g_raid_disk *disk, u_int event)
 {
 	struct g_raid_softc *sc;
 
 	sc = disk->d_softc;
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	G_RAID_DEBUG1(2, sc, "Event %s for disk %s.",
 	    g_raid_disk_event2str(event),
 	    g_raid_get_diskname(disk));
 
 	if (sc->sc_md)
 		G_RAID_MD_EVENT(sc->sc_md, disk, event);
 	return (0);
 }
 
 /*
  * Node event.
  */
 static int
 g_raid_update_node(struct g_raid_softc *sc, u_int event)
 {
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	G_RAID_DEBUG1(2, sc, "Event %s for the array.",
 	    g_raid_node_event2str(event));
 
 	if (event == G_RAID_NODE_E_WAKE)
 		return (0);
 	if (sc->sc_md)
 		G_RAID_MD_EVENT(sc->sc_md, NULL, event);
 	return (0);
 }
 
 static int
 g_raid_access(struct g_provider *pp, int acr, int acw, int ace)
 {
 	struct g_raid_volume *vol;
 	struct g_raid_softc *sc;
 	int dcw, opens, error = 0;
 
 	g_topology_assert();
 	sc = pp->geom->softc;
 	vol = pp->private;
 	KASSERT(sc != NULL, ("NULL softc (provider=%s).", pp->name));
 	KASSERT(vol != NULL, ("NULL volume (provider=%s).", pp->name));
 
 	G_RAID_DEBUG1(2, sc, "Access request for %s: r%dw%de%d.", pp->name,
 	    acr, acw, ace);
 	dcw = pp->acw + acw;
 
 	g_topology_unlock();
 	sx_xlock(&sc->sc_lock);
 	/* Deny new opens while dying. */
 	if (sc->sc_stopping != 0 && (acr > 0 || acw > 0 || ace > 0)) {
 		error = ENXIO;
 		goto out;
 	}
 	/* Deny write opens for read-only volumes. */
 	if (vol->v_read_only && acw > 0) {
 		error = EROFS;
 		goto out;
 	}
 	if (dcw == 0)
 		g_raid_clean(vol, dcw);
 	vol->v_provider_open += acr + acw + ace;
 	/* Handle delayed node destruction. */
 	if (sc->sc_stopping == G_RAID_DESTROY_DELAYED &&
 	    vol->v_provider_open == 0) {
 		/* Count open volumes. */
 		opens = g_raid_nopens(sc);
 		if (opens == 0) {
 			sc->sc_stopping = G_RAID_DESTROY_HARD;
 			/* Wake up worker to make it selfdestruct. */
 			g_raid_event_send(sc, G_RAID_NODE_E_WAKE, 0);
 		}
 	}
 	/* Handle open volume destruction. */
 	if (vol->v_stopping && vol->v_provider_open == 0)
 		g_raid_destroy_volume(vol);
 out:
 	sx_xunlock(&sc->sc_lock);
 	g_topology_lock();
 	return (error);
 }
 
 struct g_raid_softc *
 g_raid_create_node(struct g_class *mp,
     const char *name, struct g_raid_md_object *md)
 {
 	struct g_raid_softc *sc;
 	struct g_geom *gp;
 	int error;
 
 	g_topology_assert();
 	G_RAID_DEBUG(1, "Creating array %s.", name);
 
 	gp = g_new_geomf(mp, "%s", name);
 	sc = malloc(sizeof(*sc), M_RAID, M_WAITOK | M_ZERO);
 	gp->start = g_raid_start;
 	gp->orphan = g_raid_orphan;
 	gp->access = g_raid_access;
 	gp->dumpconf = g_raid_dumpconf;
 
 	sc->sc_md = md;
 	sc->sc_geom = gp;
 	sc->sc_flags = 0;
 	TAILQ_INIT(&sc->sc_volumes);
 	TAILQ_INIT(&sc->sc_disks);
 	sx_init(&sc->sc_lock, "graid:lock");
 	mtx_init(&sc->sc_queue_mtx, "graid:queue", NULL, MTX_DEF);
 	TAILQ_INIT(&sc->sc_events);
 	bioq_init(&sc->sc_queue);
 	gp->softc = sc;
 	error = kproc_create(g_raid_worker, sc, &sc->sc_worker, 0, 0,
 	    "g_raid %s", name);
 	if (error != 0) {
 		G_RAID_DEBUG(0, "Cannot create kernel thread for %s.", name);
 		mtx_destroy(&sc->sc_queue_mtx);
 		sx_destroy(&sc->sc_lock);
 		g_destroy_geom(sc->sc_geom);
 		free(sc, M_RAID);
 		return (NULL);
 	}
 
 	G_RAID_DEBUG1(0, sc, "Array %s created.", name);
 	return (sc);
 }
 
 struct g_raid_volume *
 g_raid_create_volume(struct g_raid_softc *sc, const char *name, int id)
 {
 	struct g_raid_volume	*vol, *vol1;
 	int i;
 
 	G_RAID_DEBUG1(1, sc, "Creating volume %s.", name);
 	vol = malloc(sizeof(*vol), M_RAID, M_WAITOK | M_ZERO);
 	vol->v_softc = sc;
 	strlcpy(vol->v_name, name, G_RAID_MAX_VOLUMENAME);
 	vol->v_state = G_RAID_VOLUME_S_STARTING;
 	vol->v_raid_level = G_RAID_VOLUME_RL_UNKNOWN;
 	vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_UNKNOWN;
 	vol->v_rotate_parity = 1;
 	bioq_init(&vol->v_inflight);
 	bioq_init(&vol->v_locked);
 	LIST_INIT(&vol->v_locks);
 	for (i = 0; i < G_RAID_MAX_SUBDISKS; i++) {
 		vol->v_subdisks[i].sd_softc = sc;
 		vol->v_subdisks[i].sd_volume = vol;
 		vol->v_subdisks[i].sd_pos = i;
 		vol->v_subdisks[i].sd_state = G_RAID_DISK_S_NONE;
 	}
 
 	/* Find free ID for this volume. */
 	g_topology_lock();
 	vol1 = vol;
 	if (id >= 0) {
 		LIST_FOREACH(vol1, &g_raid_volumes, v_global_next) {
 			if (vol1->v_global_id == id)
 				break;
 		}
 	}
 	if (vol1 != NULL) {
 		for (id = 0; ; id++) {
 			LIST_FOREACH(vol1, &g_raid_volumes, v_global_next) {
 				if (vol1->v_global_id == id)
 					break;
 			}
 			if (vol1 == NULL)
 				break;
 		}
 	}
 	vol->v_global_id = id;
 	LIST_INSERT_HEAD(&g_raid_volumes, vol, v_global_next);
 	g_topology_unlock();
 
 	/* Delay root mounting. */
 	vol->v_rootmount = root_mount_hold("GRAID");
 	G_RAID_DEBUG1(1, sc, "root_mount_hold %p", vol->v_rootmount);
 	vol->v_starting = 1;
 	TAILQ_INSERT_TAIL(&sc->sc_volumes, vol, v_next);
 	return (vol);
 }
 
 struct g_raid_disk *
 g_raid_create_disk(struct g_raid_softc *sc)
 {
 	struct g_raid_disk	*disk;
 
 	G_RAID_DEBUG1(1, sc, "Creating disk.");
 	disk = malloc(sizeof(*disk), M_RAID, M_WAITOK | M_ZERO);
 	disk->d_softc = sc;
 	disk->d_state = G_RAID_DISK_S_NONE;
 	TAILQ_INIT(&disk->d_subdisks);
 	TAILQ_INSERT_TAIL(&sc->sc_disks, disk, d_next);
 	return (disk);
 }
 
 int g_raid_start_volume(struct g_raid_volume *vol)
 {
 	struct g_raid_tr_class *class;
 	struct g_raid_tr_object *obj;
 	int status;
 
 	G_RAID_DEBUG1(2, vol->v_softc, "Starting volume %s.", vol->v_name);
 	LIST_FOREACH(class, &g_raid_tr_classes, trc_list) {
 		if (!class->trc_enable)
 			continue;
 		G_RAID_DEBUG1(2, vol->v_softc,
 		    "Tasting volume %s for %s transformation.",
 		    vol->v_name, class->name);
 		obj = (void *)kobj_create((kobj_class_t)class, M_RAID,
 		    M_WAITOK);
 		obj->tro_class = class;
 		obj->tro_volume = vol;
 		status = G_RAID_TR_TASTE(obj, vol);
 		if (status != G_RAID_TR_TASTE_FAIL)
 			break;
 		kobj_delete((kobj_t)obj, M_RAID);
 	}
 	if (class == NULL) {
 		G_RAID_DEBUG1(0, vol->v_softc,
 		    "No transformation module found for %s.",
 		    vol->v_name);
 		vol->v_tr = NULL;
 		g_raid_change_volume_state(vol, G_RAID_VOLUME_S_UNSUPPORTED);
 		g_raid_event_send(vol, G_RAID_VOLUME_E_DOWN,
 		    G_RAID_EVENT_VOLUME);
 		return (-1);
 	}
 	G_RAID_DEBUG1(2, vol->v_softc,
 	    "Transformation module %s chosen for %s.",
 	    class->name, vol->v_name);
 	vol->v_tr = obj;
 	return (0);
 }
 
 int
 g_raid_destroy_node(struct g_raid_softc *sc, int worker)
 {
 	struct g_raid_volume *vol, *tmpv;
 	struct g_raid_disk *disk, *tmpd;
 	int error = 0;
 
 	sc->sc_stopping = G_RAID_DESTROY_HARD;
 	TAILQ_FOREACH_SAFE(vol, &sc->sc_volumes, v_next, tmpv) {
 		if (g_raid_destroy_volume(vol))
 			error = EBUSY;
 	}
 	if (error)
 		return (error);
 	TAILQ_FOREACH_SAFE(disk, &sc->sc_disks, d_next, tmpd) {
 		if (g_raid_destroy_disk(disk))
 			error = EBUSY;
 	}
 	if (error)
 		return (error);
 	if (sc->sc_md) {
 		G_RAID_MD_FREE(sc->sc_md);
 		kobj_delete((kobj_t)sc->sc_md, M_RAID);
 		sc->sc_md = NULL;
 	}
 	if (sc->sc_geom != NULL) {
 		G_RAID_DEBUG1(0, sc, "Array %s destroyed.", sc->sc_name);
 		g_topology_lock();
 		sc->sc_geom->softc = NULL;
 		g_wither_geom(sc->sc_geom, ENXIO);
 		g_topology_unlock();
 		sc->sc_geom = NULL;
 	} else
 		G_RAID_DEBUG(1, "Array destroyed.");
 	if (worker) {
 		g_raid_event_cancel(sc, sc);
 		mtx_destroy(&sc->sc_queue_mtx);
 		sx_xunlock(&sc->sc_lock);
 		sx_destroy(&sc->sc_lock);
 		wakeup(&sc->sc_stopping);
 		free(sc, M_RAID);
 		curthread->td_pflags &= ~TDP_GEOM;
 		G_RAID_DEBUG(1, "Thread exiting.");
 		kproc_exit(0);
 	} else {
 		/* Wake up worker to make it selfdestruct. */
 		g_raid_event_send(sc, G_RAID_NODE_E_WAKE, 0);
 	}
 	return (0);
 }
 
 int
 g_raid_destroy_volume(struct g_raid_volume *vol)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_disk *disk;
 	int i;
 
 	sc = vol->v_softc;
 	G_RAID_DEBUG1(2, sc, "Destroying volume %s.", vol->v_name);
 	vol->v_stopping = 1;
 	if (vol->v_state != G_RAID_VOLUME_S_STOPPED) {
 		if (vol->v_tr) {
 			G_RAID_TR_STOP(vol->v_tr);
 			return (EBUSY);
 		} else
 			vol->v_state = G_RAID_VOLUME_S_STOPPED;
 	}
 	if (g_raid_event_check(sc, vol) != 0)
 		return (EBUSY);
 	if (vol->v_provider != NULL)
 		return (EBUSY);
 	if (vol->v_provider_open != 0)
 		return (EBUSY);
 	if (vol->v_tr) {
 		G_RAID_TR_FREE(vol->v_tr);
 		kobj_delete((kobj_t)vol->v_tr, M_RAID);
 		vol->v_tr = NULL;
 	}
 	if (vol->v_rootmount)
 		root_mount_rel(vol->v_rootmount);
 	g_topology_lock();
 	LIST_REMOVE(vol, v_global_next);
 	g_topology_unlock();
 	TAILQ_REMOVE(&sc->sc_volumes, vol, v_next);
 	for (i = 0; i < G_RAID_MAX_SUBDISKS; i++) {
 		g_raid_event_cancel(sc, &vol->v_subdisks[i]);
 		disk = vol->v_subdisks[i].sd_disk;
 		if (disk == NULL)
 			continue;
 		TAILQ_REMOVE(&disk->d_subdisks, &vol->v_subdisks[i], sd_next);
 	}
 	G_RAID_DEBUG1(2, sc, "Volume %s destroyed.", vol->v_name);
 	if (sc->sc_md)
 		G_RAID_MD_FREE_VOLUME(sc->sc_md, vol);
 	g_raid_event_cancel(sc, vol);
 	free(vol, M_RAID);
 	if (sc->sc_stopping == G_RAID_DESTROY_HARD) {
 		/* Wake up worker to let it selfdestruct. */
 		g_raid_event_send(sc, G_RAID_NODE_E_WAKE, 0);
 	}
 	return (0);
 }
 
 int
 g_raid_destroy_disk(struct g_raid_disk *disk)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_subdisk *sd, *tmp;
 
 	sc = disk->d_softc;
 	G_RAID_DEBUG1(2, sc, "Destroying disk.");
 	if (disk->d_consumer) {
 		g_raid_kill_consumer(sc, disk->d_consumer);
 		disk->d_consumer = NULL;
 	}
 	TAILQ_FOREACH_SAFE(sd, &disk->d_subdisks, sd_next, tmp) {
 		g_raid_change_subdisk_state(sd, G_RAID_SUBDISK_S_NONE);
 		g_raid_event_send(sd, G_RAID_SUBDISK_E_DISCONNECTED,
 		    G_RAID_EVENT_SUBDISK);
 		TAILQ_REMOVE(&disk->d_subdisks, sd, sd_next);
 		sd->sd_disk = NULL;
 	}
 	TAILQ_REMOVE(&sc->sc_disks, disk, d_next);
 	if (sc->sc_md)
 		G_RAID_MD_FREE_DISK(sc->sc_md, disk);
 	g_raid_event_cancel(sc, disk);
 	free(disk, M_RAID);
 	return (0);
 }
 
 int
 g_raid_destroy(struct g_raid_softc *sc, int how)
 {
 	int error, opens;
 
 	g_topology_assert_not();
 	if (sc == NULL)
 		return (ENXIO);
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	/* Count open volumes. */
 	opens = g_raid_nopens(sc);
 
 	/* React on some opened volumes. */
 	if (opens > 0) {
 		switch (how) {
 		case G_RAID_DESTROY_SOFT:
 			G_RAID_DEBUG1(1, sc,
 			    "%d volumes are still open.",
 			    opens);
 			sx_xunlock(&sc->sc_lock);
 			return (EBUSY);
 		case G_RAID_DESTROY_DELAYED:
 			G_RAID_DEBUG1(1, sc,
 			    "Array will be destroyed on last close.");
 			sc->sc_stopping = G_RAID_DESTROY_DELAYED;
 			sx_xunlock(&sc->sc_lock);
 			return (EBUSY);
 		case G_RAID_DESTROY_HARD:
 			G_RAID_DEBUG1(1, sc,
 			    "%d volumes are still open.",
 			    opens);
 		}
 	}
 
 	/* Mark node for destruction. */
 	sc->sc_stopping = G_RAID_DESTROY_HARD;
 	/* Wake up worker to let it selfdestruct. */
 	g_raid_event_send(sc, G_RAID_NODE_E_WAKE, 0);
 	/* Sleep until node destroyed. */
 	error = sx_sleep(&sc->sc_stopping, &sc->sc_lock,
 	    PRIBIO | PDROP, "r:destroy", hz * 3);
 	return (error == EWOULDBLOCK ? EBUSY : 0);
 }
 
 static void
 g_raid_taste_orphan(struct g_consumer *cp)
 {
 
 	KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
 	    cp->provider->name));
 }
 
 static struct g_geom *
 g_raid_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
 {
 	struct g_consumer *cp;
 	struct g_geom *gp, *geom;
 	struct g_raid_md_class *class;
 	struct g_raid_md_object *obj;
 	int status;
 
 	g_topology_assert();
 	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
 	if (!g_raid_enable)
 		return (NULL);
 	G_RAID_DEBUG(2, "Tasting provider %s.", pp->name);
 
 	geom = NULL;
 	status = G_RAID_MD_TASTE_FAIL;
 	gp = g_new_geomf(mp, "raid:taste");
 	/*
 	 * This orphan function should be never called.
 	 */
 	gp->orphan = g_raid_taste_orphan;
 	cp = g_new_consumer(gp);
 	cp->flags |= G_CF_DIRECT_RECEIVE;
 	g_attach(cp, pp);
 	if (g_access(cp, 1, 0, 0) != 0)
 		goto ofail;
 
 	LIST_FOREACH(class, &g_raid_md_classes, mdc_list) {
 		if (!class->mdc_enable)
 			continue;
 		G_RAID_DEBUG(2, "Tasting provider %s for %s metadata.",
 		    pp->name, class->name);
 		obj = (void *)kobj_create((kobj_class_t)class, M_RAID,
 		    M_WAITOK);
 		obj->mdo_class = class;
 		status = G_RAID_MD_TASTE(obj, mp, cp, &geom);
 		if (status != G_RAID_MD_TASTE_NEW)
 			kobj_delete((kobj_t)obj, M_RAID);
 		if (status != G_RAID_MD_TASTE_FAIL)
 			break;
 	}
 
 	if (status == G_RAID_MD_TASTE_FAIL)
 		(void)g_access(cp, -1, 0, 0);
 ofail:
 	g_detach(cp);
 	g_destroy_consumer(cp);
 	g_destroy_geom(gp);
 	G_RAID_DEBUG(2, "Tasting provider %s done.", pp->name);
 	return (geom);
 }
 
 int
 g_raid_create_node_format(const char *format, struct gctl_req *req,
     struct g_geom **gp)
 {
 	struct g_raid_md_class *class;
 	struct g_raid_md_object *obj;
 	int status;
 
 	G_RAID_DEBUG(2, "Creating array for %s metadata.", format);
 	LIST_FOREACH(class, &g_raid_md_classes, mdc_list) {
 		if (strcasecmp(class->name, format) == 0)
 			break;
 	}
 	if (class == NULL) {
 		G_RAID_DEBUG(1, "No support for %s metadata.", format);
 		return (G_RAID_MD_TASTE_FAIL);
 	}
 	obj = (void *)kobj_create((kobj_class_t)class, M_RAID,
 	    M_WAITOK);
 	obj->mdo_class = class;
 	status = G_RAID_MD_CREATE_REQ(obj, &g_raid_class, req, gp);
 	if (status != G_RAID_MD_TASTE_NEW)
 		kobj_delete((kobj_t)obj, M_RAID);
 	return (status);
 }
 
 static int
 g_raid_destroy_geom(struct gctl_req *req __unused,
     struct g_class *mp __unused, struct g_geom *gp)
 {
 	struct g_raid_softc *sc;
 	int error;
 
 	g_topology_unlock();
 	sc = gp->softc;
 	sx_xlock(&sc->sc_lock);
 	g_cancel_event(sc);
 	error = g_raid_destroy(gp->softc, G_RAID_DESTROY_SOFT);
 	g_topology_lock();
 	return (error);
 }
 
 void g_raid_write_metadata(struct g_raid_softc *sc, struct g_raid_volume *vol,
     struct g_raid_subdisk *sd, struct g_raid_disk *disk)
 {
 
 	if (sc->sc_stopping == G_RAID_DESTROY_HARD)
 		return;
 	if (sc->sc_md)
 		G_RAID_MD_WRITE(sc->sc_md, vol, sd, disk);
 }
 
 void g_raid_fail_disk(struct g_raid_softc *sc,
     struct g_raid_subdisk *sd, struct g_raid_disk *disk)
 {
 
 	if (disk == NULL)
 		disk = sd->sd_disk;
 	if (disk == NULL) {
 		G_RAID_DEBUG1(0, sc, "Warning! Fail request to an absent disk!");
 		return;
 	}
 	if (disk->d_state != G_RAID_DISK_S_ACTIVE) {
 		G_RAID_DEBUG1(0, sc, "Warning! Fail request to a disk in a "
 		    "wrong state (%s)!", g_raid_disk_state2str(disk->d_state));
 		return;
 	}
 	if (sc->sc_md)
 		G_RAID_MD_FAIL_DISK(sc->sc_md, sd, disk);
 }
 
 static void
 g_raid_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
     struct g_consumer *cp, struct g_provider *pp)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_volume *vol;
 	struct g_raid_subdisk *sd;
 	struct g_raid_disk *disk;
 	int i, s;
 
 	g_topology_assert();
 
 	sc = gp->softc;
 	if (sc == NULL)
 		return;
 	if (pp != NULL) {
 		vol = pp->private;
 		g_topology_unlock();
 		sx_xlock(&sc->sc_lock);
 		sbuf_printf(sb, "%s<descr>%s %s volume</descr>\n", indent,
 		    sc->sc_md->mdo_class->name,
 		    g_raid_volume_level2str(vol->v_raid_level,
 		    vol->v_raid_level_qualifier));
 		sbuf_printf(sb, "%s<Label>%s</Label>\n", indent,
 		    vol->v_name);
 		sbuf_printf(sb, "%s<RAIDLevel>%s</RAIDLevel>\n", indent,
 		    g_raid_volume_level2str(vol->v_raid_level,
 		    vol->v_raid_level_qualifier));
 		sbuf_printf(sb,
 		    "%s<Transformation>%s</Transformation>\n", indent,
 		    vol->v_tr ? vol->v_tr->tro_class->name : "NONE");
 		sbuf_printf(sb, "%s<Components>%u</Components>\n", indent,
 		    vol->v_disks_count);
 		sbuf_printf(sb, "%s<Strip>%u</Strip>\n", indent,
 		    vol->v_strip_size);
 		sbuf_printf(sb, "%s<State>%s</State>\n", indent,
 		    g_raid_volume_state2str(vol->v_state));
 		sbuf_printf(sb, "%s<Dirty>%s</Dirty>\n", indent,
 		    vol->v_dirty ? "Yes" : "No");
 		sbuf_printf(sb, "%s<Subdisks>", indent);
 		for (i = 0; i < vol->v_disks_count; i++) {
 			sd = &vol->v_subdisks[i];
 			if (sd->sd_disk != NULL &&
 			    sd->sd_disk->d_consumer != NULL) {
 				sbuf_printf(sb, "%s ",
 				    g_raid_get_diskname(sd->sd_disk));
 			} else {
 				sbuf_cat(sb, "NONE ");
 			}
 			sbuf_printf(sb, "(%s",
 			    g_raid_subdisk_state2str(sd->sd_state));
 			if (sd->sd_state == G_RAID_SUBDISK_S_REBUILD ||
 			    sd->sd_state == G_RAID_SUBDISK_S_RESYNC) {
 				sbuf_printf(sb, " %d%%",
 				    (int)(sd->sd_rebuild_pos * 100 /
 				     sd->sd_size));
 			}
 			sbuf_cat(sb, ")");
 			if (i + 1 < vol->v_disks_count)
 				sbuf_cat(sb, ", ");
 		}
 		sbuf_cat(sb, "</Subdisks>\n");
 		sx_xunlock(&sc->sc_lock);
 		g_topology_lock();
 	} else if (cp != NULL) {
 		disk = cp->private;
 		if (disk == NULL)
 			return;
 		g_topology_unlock();
 		sx_xlock(&sc->sc_lock);
 		sbuf_printf(sb, "%s<State>%s", indent,
 		    g_raid_disk_state2str(disk->d_state));
 		if (!TAILQ_EMPTY(&disk->d_subdisks)) {
 			sbuf_cat(sb, " (");
 			TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) {
 				sbuf_printf(sb, "%s",
 				    g_raid_subdisk_state2str(sd->sd_state));
 				if (sd->sd_state == G_RAID_SUBDISK_S_REBUILD ||
 				    sd->sd_state == G_RAID_SUBDISK_S_RESYNC) {
 					sbuf_printf(sb, " %d%%",
 					    (int)(sd->sd_rebuild_pos * 100 /
 					     sd->sd_size));
 				}
 				if (TAILQ_NEXT(sd, sd_next))
 					sbuf_cat(sb, ", ");
 			}
 			sbuf_cat(sb, ")");
 		}
 		sbuf_cat(sb, "</State>\n");
 		sbuf_printf(sb, "%s<Subdisks>", indent);
 		TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) {
 			sbuf_printf(sb, "r%d(%s):%d@%ju",
 			    sd->sd_volume->v_global_id,
 			    sd->sd_volume->v_name,
 			    sd->sd_pos, (uintmax_t)sd->sd_offset);
 			if (TAILQ_NEXT(sd, sd_next))
 				sbuf_cat(sb, ", ");
 		}
 		sbuf_cat(sb, "</Subdisks>\n");
 		sbuf_printf(sb, "%s<ReadErrors>%d</ReadErrors>\n", indent,
 		    disk->d_read_errs);
 		sx_xunlock(&sc->sc_lock);
 		g_topology_lock();
 	} else {
 		g_topology_unlock();
 		sx_xlock(&sc->sc_lock);
 		if (sc->sc_md) {
 			sbuf_printf(sb, "%s<Metadata>%s</Metadata>\n", indent,
 			    sc->sc_md->mdo_class->name);
 		}
 		if (!TAILQ_EMPTY(&sc->sc_volumes)) {
 			s = 0xff;
 			TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
 				if (vol->v_state < s)
 					s = vol->v_state;
 			}
 			sbuf_printf(sb, "%s<State>%s</State>\n", indent,
 			    g_raid_volume_state2str(s));
 		}
 		sx_xunlock(&sc->sc_lock);
 		g_topology_lock();
 	}
 }
 
 static void
 g_raid_shutdown_post_sync(void *arg, int howto)
 {
 	struct g_class *mp;
 	struct g_geom *gp, *gp2;
 	struct g_raid_softc *sc;
 	struct g_raid_volume *vol;
 
 	mp = arg;
 	g_topology_lock();
 	g_raid_shutdown = 1;
 	LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) {
 		if ((sc = gp->softc) == NULL)
 			continue;
 		g_topology_unlock();
 		sx_xlock(&sc->sc_lock);
 		TAILQ_FOREACH(vol, &sc->sc_volumes, v_next)
 			g_raid_clean(vol, -1);
 		g_cancel_event(sc);
 		g_raid_destroy(sc, G_RAID_DESTROY_DELAYED);
 		g_topology_lock();
 	}
 	g_topology_unlock();
 }
 
 static void
 g_raid_init(struct g_class *mp)
 {
 
 	g_raid_post_sync = EVENTHANDLER_REGISTER(shutdown_post_sync,
 	    g_raid_shutdown_post_sync, mp, SHUTDOWN_PRI_FIRST);
 	if (g_raid_post_sync == NULL)
 		G_RAID_DEBUG(0, "Warning! Cannot register shutdown event.");
 	g_raid_started = 1;
 }
 
 static void
 g_raid_fini(struct g_class *mp)
 {
 
 	if (g_raid_post_sync != NULL)
 		EVENTHANDLER_DEREGISTER(shutdown_post_sync, g_raid_post_sync);
 	g_raid_started = 0;
 }
 
 int
 g_raid_md_modevent(module_t mod, int type, void *arg)
 {
 	struct g_raid_md_class *class, *c, *nc;
 	int error;
 
 	error = 0;
 	class = arg;
 	switch (type) {
 	case MOD_LOAD:
 		c = LIST_FIRST(&g_raid_md_classes);
 		if (c == NULL || c->mdc_priority > class->mdc_priority)
 			LIST_INSERT_HEAD(&g_raid_md_classes, class, mdc_list);
 		else {
 			while ((nc = LIST_NEXT(c, mdc_list)) != NULL &&
 			    nc->mdc_priority < class->mdc_priority)
 				c = nc;
 			LIST_INSERT_AFTER(c, class, mdc_list);
 		}
 		if (g_raid_started)
 			g_retaste(&g_raid_class);
 		break;
 	case MOD_UNLOAD:
 		LIST_REMOVE(class, mdc_list);
 		break;
 	default:
 		error = EOPNOTSUPP;
 		break;
 	}
 
 	return (error);
 }
 
 int
 g_raid_tr_modevent(module_t mod, int type, void *arg)
 {
 	struct g_raid_tr_class *class, *c, *nc;
 	int error;
 
 	error = 0;
 	class = arg;
 	switch (type) {
 	case MOD_LOAD:
 		c = LIST_FIRST(&g_raid_tr_classes);
 		if (c == NULL || c->trc_priority > class->trc_priority)
 			LIST_INSERT_HEAD(&g_raid_tr_classes, class, trc_list);
 		else {
 			while ((nc = LIST_NEXT(c, trc_list)) != NULL &&
 			    nc->trc_priority < class->trc_priority)
 				c = nc;
 			LIST_INSERT_AFTER(c, class, trc_list);
 		}
 		break;
 	case MOD_UNLOAD:
 		LIST_REMOVE(class, trc_list);
 		break;
 	default:
 		error = EOPNOTSUPP;
 		break;
 	}
 
 	return (error);
 }
 
 /*
  * Use local implementation of DECLARE_GEOM_CLASS(g_raid_class, g_raid)
  * to reduce module priority, allowing submodules to register them first.
  */
 static moduledata_t g_raid_mod = {
 	"g_raid",
 	g_modevent,
 	&g_raid_class
 };
 DECLARE_MODULE(g_raid, g_raid_mod, SI_SUB_DRIVERS, SI_ORDER_THIRD);
 MODULE_VERSION(geom_raid, 0);
Index: head/sys/geom/raid/g_raid.h
===================================================================
--- head/sys/geom/raid/g_raid.h	(revision 350693)
+++ head/sys/geom/raid/g_raid.h	(revision 350694)
@@ -1,471 +1,445 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2010 Alexander Motin <mav@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef	_G_RAID_H_
 #define	_G_RAID_H_
 
 #include <sys/param.h>
 #include <sys/kobj.h>
 #include <sys/bio.h>
 #include <sys/time.h>
 #ifdef _KERNEL
 #include <sys/sysctl.h>
 #endif
 
 #define	G_RAID_CLASS_NAME	"RAID"
 
 #define	G_RAID_MAGIC		"GEOM::RAID"
 
 #define	G_RAID_VERSION		0
 
 struct g_raid_md_object;
 struct g_raid_tr_object;
 
 #define	G_RAID_DEVICE_FLAG_NOAUTOSYNC	0x0000000000000001ULL
 #define	G_RAID_DEVICE_FLAG_NOFAILSYNC	0x0000000000000002ULL
 #define	G_RAID_DEVICE_FLAG_MASK	(G_RAID_DEVICE_FLAG_NOAUTOSYNC | \
 					 G_RAID_DEVICE_FLAG_NOFAILSYNC)
 
 #ifdef _KERNEL
 extern u_int g_raid_aggressive_spare;
 extern u_int g_raid_debug;
 extern int g_raid_enable;
 extern int g_raid_read_err_thresh;
 extern u_int g_raid_start_timeout;
 extern struct g_class g_raid_class;
 
-#define	G_RAID_DEBUG(lvl, fmt, ...)	do {				\
-	if (g_raid_debug >= (lvl)) {					\
-		if (g_raid_debug > 0) {					\
-			printf("GEOM_RAID[%u]: " fmt "\n",		\
-			    lvl, ## __VA_ARGS__);			\
-		} else {						\
-			printf("GEOM_RAID: " fmt "\n",			\
-			    ## __VA_ARGS__);				\
-		}							\
-	}								\
-} while (0)
-#define	G_RAID_DEBUG1(lvl, sc, fmt, ...)	do {			\
-	if (g_raid_debug >= (lvl)) {					\
-		if (g_raid_debug > 0) {					\
-			printf("GEOM_RAID[%u]: %s: " fmt "\n",		\
-			    lvl, (sc)->sc_name, ## __VA_ARGS__);	\
-		} else {						\
-			printf("GEOM_RAID: %s: " fmt "\n",		\
-			    (sc)->sc_name, ## __VA_ARGS__);		\
-		}							\
-	}								\
-} while (0)
-#define	G_RAID_LOGREQ(lvl, bp, fmt, ...)	do {			\
-	if (g_raid_debug >= (lvl)) {					\
-		if (g_raid_debug > 0) {					\
-			printf("GEOM_RAID[%u]: " fmt " ",		\
-			    lvl, ## __VA_ARGS__);			\
-		} else							\
-			printf("GEOM_RAID: " fmt " ", ## __VA_ARGS__);	\
-		g_print_bio(bp);					\
-		printf("\n");						\
-	}								\
-} while (0)
+#define	G_RAID_DEBUG(lvl, ...) \
+    _GEOM_DEBUG("GEOM_RAID", g_raid_debug, (lvl), NULL, __VA_ARGS__)
+#define	G_RAID_DEBUG1(lvl, sc, fmt, ...)				\
+    _GEOM_DEBUG("GEOM_RAID", g_raid_debug, (lvl), NULL, "%s: " fmt,	\
+	(sc)->sc_name, ## __VA_ARGS__)
+#define	G_RAID_LOGREQ(lvl, bp, ...) \
+    _GEOM_DEBUG("GEOM_RAID", g_raid_debug, (lvl), (bp), __VA_ARGS__)
 
 /*
  * Flags we use to distinguish I/O initiated by the TR layer to maintain
  * the volume's characteristics, fix subdisks, extra copies of data, etc.
  *
  * G_RAID_BIO_FLAG_SYNC		I/O to update an extra copy of the data
  *				for RAID volumes that maintain extra data
  *				and need to rebuild that data.
  * G_RAID_BIO_FLAG_REMAP	I/O done to try to provoke a subdisk into
  *				doing some desirable action such as bad
  *				block remapping after we detect a bad part
  *				of the disk.
  * G_RAID_BIO_FLAG_LOCKED	I/O holds range lock that should re released.
  *
  * and the following meta item:
  * G_RAID_BIO_FLAG_SPECIAL	And of the I/O flags that need to make it
  *				through the range locking which would
  *				otherwise defer the I/O until after that
  *				range is unlocked.
  */
 #define	G_RAID_BIO_FLAG_SYNC		0x01
 #define	G_RAID_BIO_FLAG_REMAP		0x02
 #define	G_RAID_BIO_FLAG_SPECIAL \
 		(G_RAID_BIO_FLAG_SYNC|G_RAID_BIO_FLAG_REMAP)
 #define	G_RAID_BIO_FLAG_LOCKED		0x80
 
 struct g_raid_lock {
 	off_t			 l_offset;
 	off_t			 l_length;
 	void			*l_callback_arg;
 	int			 l_pending;
 	LIST_ENTRY(g_raid_lock)	 l_next;
 };
 
 #define	G_RAID_EVENT_WAIT	0x01
 #define	G_RAID_EVENT_VOLUME	0x02
 #define	G_RAID_EVENT_SUBDISK	0x04
 #define	G_RAID_EVENT_DISK	0x08
 #define	G_RAID_EVENT_DONE	0x10
 struct g_raid_event {
 	void			*e_tgt;
 	int			 e_event;
 	int			 e_flags;
 	int			 e_error;
 	TAILQ_ENTRY(g_raid_event) e_next;
 };
 #define G_RAID_DISK_S_NONE		0x00	/* State is unknown. */
 #define G_RAID_DISK_S_OFFLINE		0x01	/* Missing disk placeholder. */
 #define G_RAID_DISK_S_DISABLED		0x02	/* Disabled. */
 #define G_RAID_DISK_S_FAILED		0x03	/* Failed. */
 #define G_RAID_DISK_S_STALE_FAILED	0x04	/* Old failed. */
 #define G_RAID_DISK_S_SPARE		0x05	/* Hot-spare. */
 #define G_RAID_DISK_S_STALE		0x06	/* Old disk, unused now. */
 #define G_RAID_DISK_S_ACTIVE		0x07	/* Operational. */
 
 #define G_RAID_DISK_E_DISCONNECTED	0x01
 
 struct g_raid_disk {
 	struct g_raid_softc	*d_softc;	/* Back-pointer to softc. */
 	struct g_consumer	*d_consumer;	/* GEOM disk consumer. */
 	void			*d_md_data;	/* Disk's metadata storage. */
 	int			 d_candelete;	/* BIO_DELETE supported. */
 	uint64_t		 d_flags;	/* Additional flags. */
 	u_int			 d_state;	/* Disk state. */
 	u_int			 d_load;	/* Disk average load. */
 	off_t			 d_last_offset;	/* Last head offset. */
 	int			 d_read_errs;	/* Count of the read errors */
 	TAILQ_HEAD(, g_raid_subdisk)	 d_subdisks; /* List of subdisks. */
 	TAILQ_ENTRY(g_raid_disk)	 d_next;	/* Next disk in the node. */
 	struct g_kerneldump	 d_kd;		/* Kernel dumping method/args. */
 };
 
 #define G_RAID_SUBDISK_S_NONE		0x00	/* Absent. */
 #define G_RAID_SUBDISK_S_FAILED		0x01	/* Failed. */
 #define G_RAID_SUBDISK_S_NEW		0x02	/* Blank. */
 #define G_RAID_SUBDISK_S_REBUILD	0x03	/* Blank + rebuild. */
 #define G_RAID_SUBDISK_S_UNINITIALIZED	0x04	/* Disk of the new volume. */
 #define G_RAID_SUBDISK_S_STALE		0x05	/* Dirty. */
 #define G_RAID_SUBDISK_S_RESYNC		0x06	/* Dirty + check/repair. */
 #define G_RAID_SUBDISK_S_ACTIVE		0x07	/* Usable. */
 
 #define G_RAID_SUBDISK_E_NEW		0x01	/* A new subdisk has arrived */
 #define G_RAID_SUBDISK_E_FAILED		0x02	/* A subdisk failed, but remains in volume */
 #define G_RAID_SUBDISK_E_DISCONNECTED	0x03	/* A subdisk removed from volume. */
 #define G_RAID_SUBDISK_E_FIRST_TR_PRIVATE 0x80	/* translation private events */
 
 #define G_RAID_SUBDISK_POS(sd)						\
     ((sd)->sd_disk ? ((sd)->sd_disk->d_last_offset - (sd)->sd_offset) : 0)
 #define G_RAID_SUBDISK_TRACK_SIZE	(1 * 1024 * 1024)
 #define G_RAID_SUBDISK_LOAD(sd)						\
     ((sd)->sd_disk ? ((sd)->sd_disk->d_load) : 0)
 #define G_RAID_SUBDISK_LOAD_SCALE	256
 
 struct g_raid_subdisk {
 	struct g_raid_softc	*sd_softc;	/* Back-pointer to softc. */
 	struct g_raid_disk	*sd_disk;	/* Where this subdisk lives. */
 	struct g_raid_volume	*sd_volume;	/* Volume, sd is a part of. */
 	off_t			 sd_offset;	/* Offset on the disk. */
 	off_t			 sd_size;	/* Size on the disk. */
 	u_int			 sd_pos;	/* Position in volume. */
 	u_int			 sd_state;	/* Subdisk state. */
 	off_t			 sd_rebuild_pos; /* Rebuild position. */
 	int			 sd_recovery;	/* Count of recovery reqs. */
 	TAILQ_ENTRY(g_raid_subdisk)	 sd_next; /* Next subdisk on disk. */
 };
 
 #define G_RAID_MAX_SUBDISKS	16
 #define G_RAID_MAX_VOLUMENAME	32
 
 #define G_RAID_VOLUME_S_STARTING	0x00
 #define G_RAID_VOLUME_S_BROKEN		0x01
 #define G_RAID_VOLUME_S_DEGRADED	0x02
 #define G_RAID_VOLUME_S_SUBOPTIMAL	0x03
 #define G_RAID_VOLUME_S_OPTIMAL		0x04
 #define G_RAID_VOLUME_S_UNSUPPORTED	0x05
 #define G_RAID_VOLUME_S_STOPPED		0x06
 
 #define G_RAID_VOLUME_S_ALIVE(s)			\
     ((s) == G_RAID_VOLUME_S_DEGRADED ||			\
      (s) == G_RAID_VOLUME_S_SUBOPTIMAL ||		\
      (s) == G_RAID_VOLUME_S_OPTIMAL)
 
 #define G_RAID_VOLUME_E_DOWN		0x00
 #define G_RAID_VOLUME_E_UP		0x01
 #define G_RAID_VOLUME_E_START		0x10
 #define G_RAID_VOLUME_E_STARTMD		0x11
 
 #define G_RAID_VOLUME_RL_RAID0		0x00
 #define G_RAID_VOLUME_RL_RAID1		0x01
 #define G_RAID_VOLUME_RL_RAID3		0x03
 #define G_RAID_VOLUME_RL_RAID4		0x04
 #define G_RAID_VOLUME_RL_RAID5		0x05
 #define G_RAID_VOLUME_RL_RAID6		0x06
 #define G_RAID_VOLUME_RL_RAIDMDF	0x07
 #define G_RAID_VOLUME_RL_RAID1E		0x11
 #define G_RAID_VOLUME_RL_SINGLE		0x0f
 #define G_RAID_VOLUME_RL_CONCAT		0x1f
 #define G_RAID_VOLUME_RL_RAID5E		0x15
 #define G_RAID_VOLUME_RL_RAID5EE	0x25
 #define G_RAID_VOLUME_RL_RAID5R		0x35
 #define G_RAID_VOLUME_RL_UNKNOWN	0xff
 
 #define G_RAID_VOLUME_RLQ_NONE		0x00
 #define G_RAID_VOLUME_RLQ_R1SM		0x00
 #define G_RAID_VOLUME_RLQ_R1MM		0x01
 #define G_RAID_VOLUME_RLQ_R3P0		0x00
 #define G_RAID_VOLUME_RLQ_R3PN		0x01
 #define G_RAID_VOLUME_RLQ_R4P0		0x00
 #define G_RAID_VOLUME_RLQ_R4PN		0x01
 #define G_RAID_VOLUME_RLQ_R5RA		0x00
 #define G_RAID_VOLUME_RLQ_R5RS		0x01
 #define G_RAID_VOLUME_RLQ_R5LA		0x02
 #define G_RAID_VOLUME_RLQ_R5LS		0x03
 #define G_RAID_VOLUME_RLQ_R6RA		0x00
 #define G_RAID_VOLUME_RLQ_R6RS		0x01
 #define G_RAID_VOLUME_RLQ_R6LA		0x02
 #define G_RAID_VOLUME_RLQ_R6LS		0x03
 #define G_RAID_VOLUME_RLQ_RMDFRA	0x00
 #define G_RAID_VOLUME_RLQ_RMDFRS	0x01
 #define G_RAID_VOLUME_RLQ_RMDFLA	0x02
 #define G_RAID_VOLUME_RLQ_RMDFLS	0x03
 #define G_RAID_VOLUME_RLQ_R1EA		0x00
 #define G_RAID_VOLUME_RLQ_R1EO		0x01
 #define G_RAID_VOLUME_RLQ_R5ERA		0x00
 #define G_RAID_VOLUME_RLQ_R5ERS		0x01
 #define G_RAID_VOLUME_RLQ_R5ELA		0x02
 #define G_RAID_VOLUME_RLQ_R5ELS		0x03
 #define G_RAID_VOLUME_RLQ_R5EERA	0x00
 #define G_RAID_VOLUME_RLQ_R5EERS	0x01
 #define G_RAID_VOLUME_RLQ_R5EELA	0x02
 #define G_RAID_VOLUME_RLQ_R5EELS	0x03
 #define G_RAID_VOLUME_RLQ_R5RRA		0x00
 #define G_RAID_VOLUME_RLQ_R5RRS		0x01
 #define G_RAID_VOLUME_RLQ_R5RLA		0x02
 #define G_RAID_VOLUME_RLQ_R5RLS		0x03
 #define G_RAID_VOLUME_RLQ_UNKNOWN	0xff
 
 struct g_raid_volume;
 
 struct g_raid_volume {
 	struct g_raid_softc	*v_softc;	/* Back-pointer to softc. */
 	struct g_provider	*v_provider;	/* GEOM provider. */
 	struct g_raid_subdisk	 v_subdisks[G_RAID_MAX_SUBDISKS];
 						/* Subdisks of this volume. */
 	void			*v_md_data;	/* Volume's metadata storage. */
 	struct g_raid_tr_object	*v_tr;		/* Transformation object. */
 	char			 v_name[G_RAID_MAX_VOLUMENAME];
 						/* Volume name. */
 	u_int			 v_state;	/* Volume state. */
 	u_int			 v_raid_level;	/* Array RAID level. */
 	u_int			 v_raid_level_qualifier; /* RAID level det. */
 	u_int			 v_disks_count;	/* Number of disks in array. */
 	u_int			 v_mdf_pdisks;	/* Number of parity disks
 						   in RAIDMDF array. */
 	uint16_t		 v_mdf_polynomial; /* Polynomial for RAIDMDF. */
 	uint8_t			 v_mdf_method;	/* Generation method for RAIDMDF. */
 	u_int			 v_strip_size;	/* Array strip size. */
 	u_int			 v_rotate_parity; /* Rotate RAID5R parity
 						   after numer of stripes. */
 	u_int			 v_sectorsize;	/* Volume sector size. */
 	off_t			 v_mediasize;	/* Volume media size.  */
 	struct bio_queue_head	 v_inflight;	/* In-flight write requests. */
 	struct bio_queue_head	 v_locked;	/* Blocked I/O requests. */
 	LIST_HEAD(, g_raid_lock) v_locks;	 /* List of locked regions. */
 	int			 v_pending_lock; /* writes to locked region */
 	int			 v_dirty;	/* Volume is DIRTY. */
 	struct timeval		 v_last_done;	/* Time of the last I/O. */
 	time_t			 v_last_write;	/* Time of the last write. */
 	u_int			 v_writes;	/* Number of active writes. */
 	struct root_hold_token	*v_rootmount;	/* Root mount delay token. */
 	int			 v_starting;	/* Volume is starting */
 	int			 v_stopping;	/* Volume is stopping */
 	int			 v_provider_open; /* Number of opens. */
 	int			 v_global_id;	/* Global volume ID (rX). */
 	int			 v_read_only;	/* Volume is read-only. */
 	TAILQ_ENTRY(g_raid_volume)	 v_next; /* List of volumes entry. */
 	LIST_ENTRY(g_raid_volume)	 v_global_next; /* Global list entry. */
 };
 
 #define G_RAID_NODE_E_WAKE	0x00
 #define G_RAID_NODE_E_START	0x01
 
 struct g_raid_softc {
 	struct g_raid_md_object	*sc_md;		/* Metadata object. */
 	struct g_geom		*sc_geom;	/* GEOM class instance. */
 	uint64_t		 sc_flags;	/* Additional flags. */
 	TAILQ_HEAD(, g_raid_volume)	 sc_volumes;	/* List of volumes. */
 	TAILQ_HEAD(, g_raid_disk)	 sc_disks;	/* List of disks. */
 	struct sx		 sc_lock;	/* Main node lock. */
 	struct proc		*sc_worker;	/* Worker process. */
 	struct mtx		 sc_queue_mtx;	/* Worker queues lock. */
 	TAILQ_HEAD(, g_raid_event) sc_events;	/* Worker events queue. */
 	struct bio_queue_head	 sc_queue;	/* Worker I/O queue. */
 	int			 sc_stopping;	/* Node is stopping */
 };
 #define	sc_name	sc_geom->name
 
 SYSCTL_DECL(_kern_geom_raid);
 
 /*
  * KOBJ parent class of metadata processing modules.
  */
 struct g_raid_md_class {
 	KOBJ_CLASS_FIELDS;
 	int		 mdc_enable;
 	int		 mdc_priority;
 	LIST_ENTRY(g_raid_md_class) mdc_list;
 };
 
 /*
  * KOBJ instance of metadata processing module.
  */
 struct g_raid_md_object {
 	KOBJ_FIELDS;
 	struct g_raid_md_class	*mdo_class;
 	struct g_raid_softc	*mdo_softc;	/* Back-pointer to softc. */
 };
 
 int g_raid_md_modevent(module_t, int, void *);
 
 #define	G_RAID_MD_DECLARE(name, label)				\
     static moduledata_t g_raid_md_##name##_mod = {		\
 	"g_raid_md_" __XSTRING(name),				\
 	g_raid_md_modevent,					\
 	&g_raid_md_##name##_class				\
     };								\
     DECLARE_MODULE(g_raid_md_##name, g_raid_md_##name##_mod,	\
 	SI_SUB_DRIVERS, SI_ORDER_SECOND);			\
     MODULE_DEPEND(g_raid_md_##name, geom_raid, 0, 0, 0);	\
     SYSCTL_NODE(_kern_geom_raid, OID_AUTO, name, CTLFLAG_RD,	\
 	NULL, label " metadata module");			\
     SYSCTL_INT(_kern_geom_raid_##name, OID_AUTO, enable,	\
 	CTLFLAG_RWTUN, &g_raid_md_##name##_class.mdc_enable, 0,	\
 	"Enable " label " metadata format taste")
 
 /*
  * KOBJ parent class of data transformation modules.
  */
 struct g_raid_tr_class {
 	KOBJ_CLASS_FIELDS;
 	int		 trc_enable;
 	int		 trc_priority;
 	int		 trc_accept_unmapped;
 	LIST_ENTRY(g_raid_tr_class) trc_list;
 };
 
 /*
  * KOBJ instance of data transformation module.
  */
 struct g_raid_tr_object {
 	KOBJ_FIELDS;
 	struct g_raid_tr_class	*tro_class;
 	struct g_raid_volume 	*tro_volume;	/* Back-pointer to volume. */
 };
 
 int g_raid_tr_modevent(module_t, int, void *);
 
 #define	G_RAID_TR_DECLARE(name, label)				\
     static moduledata_t g_raid_tr_##name##_mod = {		\
 	"g_raid_tr_" __XSTRING(name),				\
 	g_raid_tr_modevent,					\
 	&g_raid_tr_##name##_class				\
     };								\
     DECLARE_MODULE(g_raid_tr_##name, g_raid_tr_##name##_mod,	\
 	SI_SUB_DRIVERS, SI_ORDER_FIRST);			\
     MODULE_DEPEND(g_raid_tr_##name, geom_raid, 0, 0, 0);	\
     SYSCTL_NODE(_kern_geom_raid, OID_AUTO, name, CTLFLAG_RD,	\
 	NULL, label " transformation module");			\
     SYSCTL_INT(_kern_geom_raid_##name, OID_AUTO, enable,	\
 	CTLFLAG_RWTUN, &g_raid_tr_##name##_class.trc_enable, 0,	\
 	"Enable " label " transformation module taste")
 
 const char * g_raid_volume_level2str(int level, int qual);
 int g_raid_volume_str2level(const char *str, int *level, int *qual);
 const char * g_raid_volume_state2str(int state);
 const char * g_raid_subdisk_state2str(int state);
 const char * g_raid_disk_state2str(int state);
 
 struct g_raid_softc * g_raid_create_node(struct g_class *mp,
     const char *name, struct g_raid_md_object *md);
 int g_raid_create_node_format(const char *format, struct gctl_req *req,
     struct g_geom **gp);
 struct g_raid_volume * g_raid_create_volume(struct g_raid_softc *sc,
     const char *name, int id);
 struct g_raid_disk * g_raid_create_disk(struct g_raid_softc *sc);
 const char * g_raid_get_diskname(struct g_raid_disk *disk);
 void g_raid_get_disk_info(struct g_raid_disk *disk);
 
 int g_raid_start_volume(struct g_raid_volume *vol);
 
 int g_raid_destroy_node(struct g_raid_softc *sc, int worker);
 int g_raid_destroy_volume(struct g_raid_volume *vol);
 int g_raid_destroy_disk(struct g_raid_disk *disk);
 
 void g_raid_iodone(struct bio *bp, int error);
 void g_raid_subdisk_iostart(struct g_raid_subdisk *sd, struct bio *bp);
 int g_raid_subdisk_kerneldump(struct g_raid_subdisk *sd,
     void *virtual, vm_offset_t physical, off_t offset, size_t length);
 
 struct g_consumer *g_raid_open_consumer(struct g_raid_softc *sc,
     const char *name);
 void g_raid_kill_consumer(struct g_raid_softc *sc, struct g_consumer *cp);
 
 void g_raid_report_disk_state(struct g_raid_disk *disk);
 void g_raid_change_disk_state(struct g_raid_disk *disk, int state);
 void g_raid_change_subdisk_state(struct g_raid_subdisk *sd, int state);
 void g_raid_change_volume_state(struct g_raid_volume *vol, int state);
 
 void g_raid_write_metadata(struct g_raid_softc *sc, struct g_raid_volume *vol,
     struct g_raid_subdisk *sd, struct g_raid_disk *disk);
 void g_raid_fail_disk(struct g_raid_softc *sc,
     struct g_raid_subdisk *sd, struct g_raid_disk *disk);
 
 void g_raid_tr_flush_common(struct g_raid_tr_object *tr, struct bio *bp);
 int g_raid_tr_kerneldump_common(struct g_raid_tr_object *tr,
     void *virtual, vm_offset_t physical, off_t offset, size_t length);
 
 u_int g_raid_ndisks(struct g_raid_softc *sc, int state);
 u_int g_raid_nsubdisks(struct g_raid_volume *vol, int state);
 u_int g_raid_nopens(struct g_raid_softc *sc);
 struct g_raid_subdisk * g_raid_get_subdisk(struct g_raid_volume *vol,
     int state);
 #define	G_RAID_DESTROY_SOFT		0
 #define	G_RAID_DESTROY_DELAYED	1
 #define	G_RAID_DESTROY_HARD		2
 int g_raid_destroy(struct g_raid_softc *sc, int how);
 int g_raid_event_send(void *arg, int event, int flags);
 int g_raid_lock_range(struct g_raid_volume *vol, off_t off, off_t len,
     struct bio *ignore, void *argp);
 int g_raid_unlock_range(struct g_raid_volume *vol, off_t off, off_t len);
 
 g_ctl_req_t g_raid_ctl;
 #endif	/* _KERNEL */
 
 #endif	/* !_G_RAID_H_ */
Index: head/sys/geom/raid/md_ddf.c
===================================================================
--- head/sys/geom/raid/md_ddf.c	(revision 350693)
+++ head/sys/geom/raid/md_ddf.c	(revision 350694)
@@ -1,3089 +1,3090 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2012 Alexander Motin <mav@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/bio.h>
 #include <sys/gsb_crc32.h>
 #include <sys/endian.h>
 #include <sys/kernel.h>
 #include <sys/kobj.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/systm.h>
 #include <sys/time.h>
 #include <sys/clock.h>
 #include <sys/disk.h>
 #include <geom/geom.h>
+#include <geom/geom_dbg.h>
 #include "geom/raid/g_raid.h"
 #include "geom/raid/md_ddf.h"
 #include "g_raid_md_if.h"
 
 static MALLOC_DEFINE(M_MD_DDF, "md_ddf_data", "GEOM_RAID DDF metadata");
 
 #define	DDF_MAX_DISKS_HARD	128
 
 #define	DDF_MAX_DISKS	16
 #define	DDF_MAX_VDISKS	7
 #define	DDF_MAX_PARTITIONS	1
 
 #define DECADE (3600*24*(365*10+2))	/* 10 years in seconds. */
 
 struct ddf_meta {
 	u_int	sectorsize;
 	u_int	bigendian;
 	struct ddf_header *hdr;
 	struct ddf_cd_record *cdr;
 	struct ddf_pd_record *pdr;
 	struct ddf_vd_record *vdr;
 	void *cr;
 	struct ddf_pdd_record *pdd;
 	struct ddf_bbm_log *bbm;
 };
 
 struct ddf_vol_meta {
 	u_int	sectorsize;
 	u_int	bigendian;
 	struct ddf_header *hdr;
 	struct ddf_cd_record *cdr;
 	struct ddf_vd_entry *vde;
 	struct ddf_vdc_record *vdc;
 	struct ddf_vdc_record *bvdc[DDF_MAX_DISKS_HARD];
 };
 
 struct g_raid_md_ddf_perdisk {
 	struct ddf_meta	 pd_meta;
 };
 
 struct g_raid_md_ddf_pervolume {
 	struct ddf_vol_meta		 pv_meta;
 	int				 pv_started;
 	struct callout			 pv_start_co;	/* STARTING state timer. */
 };
 
 struct g_raid_md_ddf_object {
 	struct g_raid_md_object	 mdio_base;
 	u_int			 mdio_bigendian;
 	struct ddf_meta		 mdio_meta;
 	int			 mdio_starting;
 	struct callout		 mdio_start_co;	/* STARTING state timer. */
 	int			 mdio_started;
 	struct root_hold_token	*mdio_rootmount; /* Root mount delay token. */
 };
 
 static g_raid_md_create_req_t g_raid_md_create_req_ddf;
 static g_raid_md_taste_t g_raid_md_taste_ddf;
 static g_raid_md_event_t g_raid_md_event_ddf;
 static g_raid_md_volume_event_t g_raid_md_volume_event_ddf;
 static g_raid_md_ctl_t g_raid_md_ctl_ddf;
 static g_raid_md_write_t g_raid_md_write_ddf;
 static g_raid_md_fail_disk_t g_raid_md_fail_disk_ddf;
 static g_raid_md_free_disk_t g_raid_md_free_disk_ddf;
 static g_raid_md_free_volume_t g_raid_md_free_volume_ddf;
 static g_raid_md_free_t g_raid_md_free_ddf;
 
 static kobj_method_t g_raid_md_ddf_methods[] = {
 	KOBJMETHOD(g_raid_md_create_req,	g_raid_md_create_req_ddf),
 	KOBJMETHOD(g_raid_md_taste,	g_raid_md_taste_ddf),
 	KOBJMETHOD(g_raid_md_event,	g_raid_md_event_ddf),
 	KOBJMETHOD(g_raid_md_volume_event,	g_raid_md_volume_event_ddf),
 	KOBJMETHOD(g_raid_md_ctl,	g_raid_md_ctl_ddf),
 	KOBJMETHOD(g_raid_md_write,	g_raid_md_write_ddf),
 	KOBJMETHOD(g_raid_md_fail_disk,	g_raid_md_fail_disk_ddf),
 	KOBJMETHOD(g_raid_md_free_disk,	g_raid_md_free_disk_ddf),
 	KOBJMETHOD(g_raid_md_free_volume,	g_raid_md_free_volume_ddf),
 	KOBJMETHOD(g_raid_md_free,	g_raid_md_free_ddf),
 	{ 0, 0 }
 };
 
 static struct g_raid_md_class g_raid_md_ddf_class = {
 	"DDF",
 	g_raid_md_ddf_methods,
 	sizeof(struct g_raid_md_ddf_object),
 	.mdc_enable = 1,
 	.mdc_priority = 100
 };
 
 #define GET8(m, f)	((m)->f)
 #define GET16(m, f)	((m)->bigendian ? be16dec(&(m)->f) : le16dec(&(m)->f))
 #define GET32(m, f)	((m)->bigendian ? be32dec(&(m)->f) : le32dec(&(m)->f))
 #define GET64(m, f)	((m)->bigendian ? be64dec(&(m)->f) : le64dec(&(m)->f))
 #define GET8D(m, f)	(f)
 #define GET16D(m, f)	((m)->bigendian ? be16dec(&f) : le16dec(&f))
 #define GET32D(m, f)	((m)->bigendian ? be32dec(&f) : le32dec(&f))
 #define GET64D(m, f)	((m)->bigendian ? be64dec(&f) : le64dec(&f))
 #define GET8P(m, f)	(*(f))
 #define GET16P(m, f)	((m)->bigendian ? be16dec(f) : le16dec(f))
 #define GET32P(m, f)	((m)->bigendian ? be32dec(f) : le32dec(f))
 #define GET64P(m, f)	((m)->bigendian ? be64dec(f) : le64dec(f))
 
 #define SET8P(m, f, v)							\
 	(*(f) = (v))
 #define SET16P(m, f, v)							\
 	do {								\
 		if ((m)->bigendian)					\
 			be16enc((f), (v));				\
 		else							\
 			le16enc((f), (v));				\
 	} while (0)
 #define SET32P(m, f, v)							\
 	do {								\
 		if ((m)->bigendian)					\
 			be32enc((f), (v));				\
 		else							\
 			le32enc((f), (v));				\
 	} while (0)
 #define SET64P(m, f, v)							\
 	do {								\
 		if ((m)->bigendian)					\
 			be64enc((f), (v));				\
 		else							\
 			le64enc((f), (v));				\
 	} while (0)
 #define SET8(m, f, v)	SET8P((m), &((m)->f), (v))
 #define SET16(m, f, v)	SET16P((m), &((m)->f), (v))
 #define SET32(m, f, v)	SET32P((m), &((m)->f), (v))
 #define SET64(m, f, v)	SET64P((m), &((m)->f), (v))
 #define SET8D(m, f, v)	SET8P((m), &(f), (v))
 #define SET16D(m, f, v)	SET16P((m), &(f), (v))
 #define SET32D(m, f, v)	SET32P((m), &(f), (v))
 #define SET64D(m, f, v)	SET64P((m), &(f), (v))
 
 #define GETCRNUM(m)	(GET32((m), hdr->cr_length) /			\
 	GET16((m), hdr->Configuration_Record_Length))
 
 #define GETVDCPTR(m, n)	((struct ddf_vdc_record *)((uint8_t *)(m)->cr +	\
 	(n) * GET16((m), hdr->Configuration_Record_Length) *		\
 	(m)->sectorsize))
 
 #define GETSAPTR(m, n)	((struct ddf_sa_record *)((uint8_t *)(m)->cr +	\
 	(n) * GET16((m), hdr->Configuration_Record_Length) *		\
 	(m)->sectorsize))
 
 static int
 isff(uint8_t *buf, int size)
 {
 	int i;
 
 	for (i = 0; i < size; i++)
 		if (buf[i] != 0xff)
 			return (0);
 	return (1);
 }
 
 static void
 print_guid(uint8_t *buf)
 {
 	int i, ascii;
 
 	ascii = 1;
 	for (i = 0; i < 24; i++) {
 		if (buf[i] != 0 && (buf[i] < ' ' || buf[i] > 127)) {
 			ascii = 0;
 			break;
 		}
 	}
 	if (ascii) {
 		printf("'%.24s'", buf);
 	} else {
 		for (i = 0; i < 24; i++)
 			printf("%02x", buf[i]);
 	}
 }
 
 static void
 g_raid_md_ddf_print(struct ddf_meta *meta)
 {
 	struct ddf_vdc_record *vdc;
 	struct ddf_vuc_record *vuc;
 	struct ddf_sa_record *sa;
 	uint64_t *val2;
 	uint32_t val;
 	int i, j, k, num, num2;
 
 	if (g_raid_debug < 1)
 		return;
 
 	printf("********* DDF Metadata *********\n");
 	printf("**** Header ****\n");
 	printf("DDF_Header_GUID      ");
 	print_guid(meta->hdr->DDF_Header_GUID);
 	printf("\n");
 	printf("DDF_rev              %8.8s\n", (char *)&meta->hdr->DDF_rev[0]);
 	printf("Sequence_Number      0x%08x\n", GET32(meta, hdr->Sequence_Number));
 	printf("TimeStamp            0x%08x\n", GET32(meta, hdr->TimeStamp));
 	printf("Open_Flag            0x%02x\n", GET16(meta, hdr->Open_Flag));
 	printf("Foreign_Flag         0x%02x\n", GET16(meta, hdr->Foreign_Flag));
 	printf("Diskgrouping         0x%02x\n", GET16(meta, hdr->Diskgrouping));
 	printf("Primary_Header_LBA   %ju\n", GET64(meta, hdr->Primary_Header_LBA));
 	printf("Secondary_Header_LBA %ju\n", GET64(meta, hdr->Secondary_Header_LBA));
 	printf("WorkSpace_Length     %u\n", GET32(meta, hdr->WorkSpace_Length));
 	printf("WorkSpace_LBA        %ju\n", GET64(meta, hdr->WorkSpace_LBA));
 	printf("Max_PD_Entries       %u\n", GET16(meta, hdr->Max_PD_Entries));
 	printf("Max_VD_Entries       %u\n", GET16(meta, hdr->Max_VD_Entries));
 	printf("Max_Partitions       %u\n", GET16(meta, hdr->Max_Partitions));
 	printf("Configuration_Record_Length %u\n", GET16(meta, hdr->Configuration_Record_Length));
 	printf("Max_Primary_Element_Entries %u\n", GET16(meta, hdr->Max_Primary_Element_Entries));
 	printf("Controller Data      %u:%u\n", GET32(meta, hdr->cd_section), GET32(meta, hdr->cd_length));
 	printf("Physical Disk        %u:%u\n", GET32(meta, hdr->pdr_section), GET32(meta, hdr->pdr_length));
 	printf("Virtual Disk         %u:%u\n", GET32(meta, hdr->vdr_section), GET32(meta, hdr->vdr_length));
 	printf("Configuration Recs   %u:%u\n", GET32(meta, hdr->cr_section), GET32(meta, hdr->cr_length));
 	printf("Physical Disk Recs   %u:%u\n", GET32(meta, hdr->pdd_section), GET32(meta, hdr->pdd_length));
 	printf("BBM Log              %u:%u\n", GET32(meta, hdr->bbmlog_section), GET32(meta, hdr->bbmlog_length));
 	printf("Diagnostic Space     %u:%u\n", GET32(meta, hdr->Diagnostic_Space), GET32(meta, hdr->Diagnostic_Space_Length));
 	printf("Vendor_Specific_Logs %u:%u\n", GET32(meta, hdr->Vendor_Specific_Logs), GET32(meta, hdr->Vendor_Specific_Logs_Length));
 	printf("**** Controller Data ****\n");
 	printf("Controller_GUID      ");
 	print_guid(meta->cdr->Controller_GUID);
 	printf("\n");
 	printf("Controller_Type      0x%04x%04x 0x%04x%04x\n",
 	    GET16(meta, cdr->Controller_Type.Vendor_ID),
 	    GET16(meta, cdr->Controller_Type.Device_ID),
 	    GET16(meta, cdr->Controller_Type.SubVendor_ID),
 	    GET16(meta, cdr->Controller_Type.SubDevice_ID));
 	printf("Product_ID           '%.16s'\n", (char *)&meta->cdr->Product_ID[0]);
 	printf("**** Physical Disk Records ****\n");
 	printf("Populated_PDEs       %u\n", GET16(meta, pdr->Populated_PDEs));
 	printf("Max_PDE_Supported    %u\n", GET16(meta, pdr->Max_PDE_Supported));
 	for (j = 0; j < GET16(meta, pdr->Populated_PDEs); j++) {
 		if (isff(meta->pdr->entry[j].PD_GUID, 24))
 			continue;
 		if (GET32(meta, pdr->entry[j].PD_Reference) == 0xffffffff)
 			continue;
 		printf("PD_GUID              ");
 		print_guid(meta->pdr->entry[j].PD_GUID);
 		printf("\n");
 		printf("PD_Reference         0x%08x\n",
 		    GET32(meta, pdr->entry[j].PD_Reference));
 		printf("PD_Type              0x%04x\n",
 		    GET16(meta, pdr->entry[j].PD_Type));
 		printf("PD_State             0x%04x\n",
 		    GET16(meta, pdr->entry[j].PD_State));
 		printf("Configured_Size      %ju\n",
 		    GET64(meta, pdr->entry[j].Configured_Size));
 		printf("Block_Size           %u\n",
 		    GET16(meta, pdr->entry[j].Block_Size));
 	}
 	printf("**** Virtual Disk Records ****\n");
 	printf("Populated_VDEs       %u\n", GET16(meta, vdr->Populated_VDEs));
 	printf("Max_VDE_Supported    %u\n", GET16(meta, vdr->Max_VDE_Supported));
 	for (j = 0; j < GET16(meta, vdr->Populated_VDEs); j++) {
 		if (isff(meta->vdr->entry[j].VD_GUID, 24))
 			continue;
 		printf("VD_GUID              ");
 		print_guid(meta->vdr->entry[j].VD_GUID);
 		printf("\n");
 		printf("VD_Number            0x%04x\n",
 		    GET16(meta, vdr->entry[j].VD_Number));
 		printf("VD_Type              0x%04x\n",
 		    GET16(meta, vdr->entry[j].VD_Type));
 		printf("VD_State             0x%02x\n",
 		    GET8(meta, vdr->entry[j].VD_State));
 		printf("Init_State           0x%02x\n",
 		    GET8(meta, vdr->entry[j].Init_State));
 		printf("Drive_Failures_Remaining %u\n",
 		    GET8(meta, vdr->entry[j].Drive_Failures_Remaining));
 		printf("VD_Name              '%.16s'\n",
 		    (char *)&meta->vdr->entry[j].VD_Name);
 	}
 	printf("**** Configuration Records ****\n");
 	num = GETCRNUM(meta);
 	for (j = 0; j < num; j++) {
 		vdc = GETVDCPTR(meta, j);
 		val = GET32D(meta, vdc->Signature);
 		switch (val) {
 		case DDF_VDCR_SIGNATURE:
 			printf("** Virtual Disk Configuration **\n");
 			printf("VD_GUID              ");
 			print_guid(vdc->VD_GUID);
 			printf("\n");
 			printf("Timestamp            0x%08x\n",
 			    GET32D(meta, vdc->Timestamp));
 			printf("Sequence_Number      0x%08x\n",
 			    GET32D(meta, vdc->Sequence_Number));
 			printf("Primary_Element_Count %u\n",
 			    GET16D(meta, vdc->Primary_Element_Count));
 			printf("Stripe_Size          %u\n",
 			    GET8D(meta, vdc->Stripe_Size));
 			printf("Primary_RAID_Level   0x%02x\n",
 			    GET8D(meta, vdc->Primary_RAID_Level));
 			printf("RLQ                  0x%02x\n",
 			    GET8D(meta, vdc->RLQ));
 			printf("Secondary_Element_Count %u\n",
 			    GET8D(meta, vdc->Secondary_Element_Count));
 			printf("Secondary_Element_Seq %u\n",
 			    GET8D(meta, vdc->Secondary_Element_Seq));
 			printf("Secondary_RAID_Level 0x%02x\n",
 			    GET8D(meta, vdc->Secondary_RAID_Level));
 			printf("Block_Count          %ju\n",
 			    GET64D(meta, vdc->Block_Count));
 			printf("VD_Size              %ju\n",
 			    GET64D(meta, vdc->VD_Size));
 			printf("Block_Size           %u\n",
 			    GET16D(meta, vdc->Block_Size));
 			printf("Rotate_Parity_count  %u\n",
 			    GET8D(meta, vdc->Rotate_Parity_count));
 			printf("Associated_Spare_Disks");
 			for (i = 0; i < 8; i++) {
 				if (GET32D(meta, vdc->Associated_Spares[i]) != 0xffffffff)
 					printf(" 0x%08x", GET32D(meta, vdc->Associated_Spares[i]));
 			}
 			printf("\n");
 			printf("Cache_Flags          %016jx\n",
 			    GET64D(meta, vdc->Cache_Flags));
 			printf("BG_Rate              %u\n",
 			    GET8D(meta, vdc->BG_Rate));
 			printf("MDF_Parity_Disks     %u\n",
 			    GET8D(meta, vdc->MDF_Parity_Disks));
 			printf("MDF_Parity_Generator_Polynomial 0x%04x\n",
 			    GET16D(meta, vdc->MDF_Parity_Generator_Polynomial));
 			printf("MDF_Constant_Generation_Method 0x%02x\n",
 			    GET8D(meta, vdc->MDF_Constant_Generation_Method));
 			printf("Physical_Disks      ");
 			num2 = GET16D(meta, vdc->Primary_Element_Count);
 			val2 = (uint64_t *)&(vdc->Physical_Disk_Sequence[GET16(meta, hdr->Max_Primary_Element_Entries)]);
 			for (i = 0; i < num2; i++)
 				printf(" 0x%08x @ %ju",
 				    GET32D(meta, vdc->Physical_Disk_Sequence[i]),
 				    GET64P(meta, val2 + i));
 			printf("\n");
 			break;
 		case DDF_VUCR_SIGNATURE:
 			printf("** Vendor Unique Configuration **\n");
 			vuc = (struct ddf_vuc_record *)vdc;
 			printf("VD_GUID              ");
 			print_guid(vuc->VD_GUID);
 			printf("\n");
 			break;
 		case DDF_SA_SIGNATURE:
 			printf("** Spare Assignment Configuration **\n");
 			sa = (struct ddf_sa_record *)vdc;
 			printf("Timestamp            0x%08x\n",
 			    GET32D(meta, sa->Timestamp));
 			printf("Spare_Type           0x%02x\n",
 			    GET8D(meta, sa->Spare_Type));
 			printf("Populated_SAEs       %u\n",
 			    GET16D(meta, sa->Populated_SAEs));
 			printf("MAX_SAE_Supported    %u\n",
 			    GET16D(meta, sa->MAX_SAE_Supported));
 			for (i = 0; i < GET16D(meta, sa->Populated_SAEs); i++) {
 				if (isff(sa->entry[i].VD_GUID, 24))
 					continue;
 				printf("VD_GUID             ");
 				for (k = 0; k < 24; k++)
 					printf("%02x", sa->entry[i].VD_GUID[k]);
 				printf("\n");
 				printf("Secondary_Element   %u\n",
 				    GET16D(meta, sa->entry[i].Secondary_Element));
 			}
 			break;
 		case 0x00000000:
 		case 0xFFFFFFFF:
 			break;
 		default:
 			printf("Unknown configuration signature %08x\n", val);
 			break;
 		}
 	}
 	printf("**** Physical Disk Data ****\n");
 	printf("PD_GUID              ");
 	print_guid(meta->pdd->PD_GUID);
 	printf("\n");
 	printf("PD_Reference         0x%08x\n",
 	    GET32(meta, pdd->PD_Reference));
 	printf("Forced_Ref_Flag      0x%02x\n",
 	    GET8(meta, pdd->Forced_Ref_Flag));
 	printf("Forced_PD_GUID_Flag  0x%02x\n",
 	    GET8(meta, pdd->Forced_PD_GUID_Flag));
 }
 
 static int
 ddf_meta_find_pd(struct ddf_meta *meta, uint8_t *GUID, uint32_t PD_Reference)
 {
 	int i;
 
 	for (i = 0; i < GET16(meta, pdr->Populated_PDEs); i++) {
 		if (GUID != NULL) {
 			if (memcmp(meta->pdr->entry[i].PD_GUID, GUID, 24) == 0)
 				return (i);
 		} else if (PD_Reference != 0xffffffff) {
 			if (GET32(meta, pdr->entry[i].PD_Reference) == PD_Reference)
 				return (i);
 		} else
 			if (isff(meta->pdr->entry[i].PD_GUID, 24))
 				return (i);
 	}
 	if (GUID == NULL && PD_Reference == 0xffffffff) {
 		if (i >= GET16(meta, pdr->Max_PDE_Supported))
 			return (-1);
 		SET16(meta, pdr->Populated_PDEs, i + 1);
 		return (i);
 	}
 	return (-1);
 }
 
 static int
 ddf_meta_find_vd(struct ddf_meta *meta, uint8_t *GUID)
 {
 	int i;
 
 	for (i = 0; i < GET16(meta, vdr->Populated_VDEs); i++) {
 		if (GUID != NULL) {
 			if (memcmp(meta->vdr->entry[i].VD_GUID, GUID, 24) == 0)
 				return (i);
 		} else
 			if (isff(meta->vdr->entry[i].VD_GUID, 24))
 				return (i);
 	}
 	if (GUID == NULL) {
 		if (i >= GET16(meta, vdr->Max_VDE_Supported))
 			return (-1);
 		SET16(meta, vdr->Populated_VDEs, i + 1);
 		return (i);
 	}
 	return (-1);
 }
 
 static struct ddf_vdc_record *
 ddf_meta_find_vdc(struct ddf_meta *meta, uint8_t *GUID)
 {
 	struct ddf_vdc_record *vdc;
 	int i, num;
 
 	num = GETCRNUM(meta);
 	for (i = 0; i < num; i++) {
 		vdc = GETVDCPTR(meta, i);
 		if (GUID != NULL) {
 			if (GET32D(meta, vdc->Signature) == DDF_VDCR_SIGNATURE &&
 			    memcmp(vdc->VD_GUID, GUID, 24) == 0)
 				return (vdc);
 		} else
 			if (GET32D(meta, vdc->Signature) == 0xffffffff ||
 			    GET32D(meta, vdc->Signature) == 0)
 				return (vdc);
 	}
 	return (NULL);
 }
 
 static int
 ddf_meta_count_vdc(struct ddf_meta *meta, uint8_t *GUID)
 {
 	struct ddf_vdc_record *vdc;
 	int i, num, cnt;
 
 	cnt = 0;
 	num = GETCRNUM(meta);
 	for (i = 0; i < num; i++) {
 		vdc = GETVDCPTR(meta, i);
 		if (GET32D(meta, vdc->Signature) != DDF_VDCR_SIGNATURE)
 			continue;
 		if (GUID == NULL || memcmp(vdc->VD_GUID, GUID, 24) == 0)
 			cnt++;
 	}
 	return (cnt);
 }
 
 static int
 ddf_meta_find_disk(struct ddf_vol_meta *vmeta, uint32_t PD_Reference,
     int *bvdp, int *posp)
 {
 	int i, bvd, pos;
 
 	i = 0;
 	for (bvd = 0; bvd < GET8(vmeta, vdc->Secondary_Element_Count); bvd++) {
 		if (vmeta->bvdc[bvd] == NULL) {
 			i += GET16(vmeta, vdc->Primary_Element_Count); // XXX
 			continue;
 		}
 		for (pos = 0; pos < GET16(vmeta, bvdc[bvd]->Primary_Element_Count);
 		    pos++, i++) {
 			if (GET32(vmeta, bvdc[bvd]->Physical_Disk_Sequence[pos]) ==
 			    PD_Reference) {
 				if (bvdp != NULL)
 					*bvdp = bvd;
 				if (posp != NULL)
 					*posp = pos;
 				return (i);
 			}
 		}
 	}
 	return (-1);
 }
 
 static struct ddf_sa_record *
 ddf_meta_find_sa(struct ddf_meta *meta, int create)
 {
 	struct ddf_sa_record *sa;
 	int i, num;
 
 	num = GETCRNUM(meta);
 	for (i = 0; i < num; i++) {
 		sa = GETSAPTR(meta, i);
 		if (GET32D(meta, sa->Signature) == DDF_SA_SIGNATURE)
 			return (sa);
 	}
 	if (create) {
 		for (i = 0; i < num; i++) {
 			sa = GETSAPTR(meta, i);
 			if (GET32D(meta, sa->Signature) == 0xffffffff ||
 			    GET32D(meta, sa->Signature) == 0)
 				return (sa);
 		}
 	}
 	return (NULL);
 }
 
 static void
 ddf_meta_create(struct g_raid_disk *disk, struct ddf_meta *sample)
 {
 	struct timespec ts;
 	struct clocktime ct;
 	struct g_raid_md_ddf_perdisk *pd;
 	struct g_raid_md_ddf_object *mdi;
 	struct ddf_meta *meta;
 	struct ddf_pd_entry *pde;
 	off_t anchorlba;
 	u_int ss, pos, size;
 	int len, error;
 	char serial_buffer[DISK_IDENT_SIZE];
 
 	if (sample->hdr == NULL)
 		sample = NULL;
 
 	mdi = (struct g_raid_md_ddf_object *)disk->d_softc->sc_md;
 	pd = (struct g_raid_md_ddf_perdisk *)disk->d_md_data;
 	meta = &pd->pd_meta;
 	ss = disk->d_consumer->provider->sectorsize;
 	anchorlba = disk->d_consumer->provider->mediasize / ss - 1;
 
 	meta->sectorsize = ss;
 	meta->bigendian = sample ? sample->bigendian : mdi->mdio_bigendian;
 	getnanotime(&ts);
 	clock_ts_to_ct(&ts, &ct);
 
 	/* Header */
 	meta->hdr = malloc(ss, M_MD_DDF, M_WAITOK);
 	memset(meta->hdr, 0xff, ss);
 	if (sample) {
 		memcpy(meta->hdr, sample->hdr, sizeof(struct ddf_header));
 		if (ss != sample->sectorsize) {
 			SET32(meta, hdr->WorkSpace_Length,
 			    howmany(GET32(sample, hdr->WorkSpace_Length) *
 			        sample->sectorsize, ss));
 			SET16(meta, hdr->Configuration_Record_Length,
 			    howmany(GET16(sample,
 			        hdr->Configuration_Record_Length) *
 				sample->sectorsize, ss));
 			SET32(meta, hdr->cd_length,
 			    howmany(GET32(sample, hdr->cd_length) *
 			        sample->sectorsize, ss));
 			SET32(meta, hdr->pdr_length,
 			    howmany(GET32(sample, hdr->pdr_length) *
 			        sample->sectorsize, ss));
 			SET32(meta, hdr->vdr_length,
 			    howmany(GET32(sample, hdr->vdr_length) *
 			        sample->sectorsize, ss));
 			SET32(meta, hdr->cr_length,
 			    howmany(GET32(sample, hdr->cr_length) *
 			        sample->sectorsize, ss));
 			SET32(meta, hdr->pdd_length,
 			    howmany(GET32(sample, hdr->pdd_length) *
 			        sample->sectorsize, ss));
 			SET32(meta, hdr->bbmlog_length,
 			    howmany(GET32(sample, hdr->bbmlog_length) *
 			        sample->sectorsize, ss));
 			SET32(meta, hdr->Diagnostic_Space,
 			    howmany(GET32(sample, hdr->bbmlog_length) *
 			        sample->sectorsize, ss));
 			SET32(meta, hdr->Vendor_Specific_Logs,
 			    howmany(GET32(sample, hdr->bbmlog_length) *
 			        sample->sectorsize, ss));
 		}
 	} else {
 		SET32(meta, hdr->Signature, DDF_HEADER_SIGNATURE);
 		snprintf(meta->hdr->DDF_Header_GUID, 25, "FreeBSD %08x%08x",
 		    (u_int)(ts.tv_sec - DECADE), arc4random());
 		memcpy(meta->hdr->DDF_rev, "02.00.00", 8);
 		SET32(meta, hdr->TimeStamp, (ts.tv_sec - DECADE));
 		SET32(meta, hdr->WorkSpace_Length, 16 * 1024 * 1024 / ss);
 		SET16(meta, hdr->Max_PD_Entries, DDF_MAX_DISKS - 1);
 		SET16(meta, hdr->Max_VD_Entries, DDF_MAX_VDISKS);
 		SET16(meta, hdr->Max_Partitions, DDF_MAX_PARTITIONS);
 		SET16(meta, hdr->Max_Primary_Element_Entries, DDF_MAX_DISKS);
 		SET16(meta, hdr->Configuration_Record_Length,
 		    howmany(sizeof(struct ddf_vdc_record) + (4 + 8) *
 		        GET16(meta, hdr->Max_Primary_Element_Entries), ss));
 		SET32(meta, hdr->cd_length,
 		    howmany(sizeof(struct ddf_cd_record), ss));
 		SET32(meta, hdr->pdr_length,
 		    howmany(sizeof(struct ddf_pd_record) +
 		        sizeof(struct ddf_pd_entry) * GET16(meta,
 			hdr->Max_PD_Entries), ss));
 		SET32(meta, hdr->vdr_length,
 		    howmany(sizeof(struct ddf_vd_record) +
 		        sizeof(struct ddf_vd_entry) *
 			GET16(meta, hdr->Max_VD_Entries), ss));
 		SET32(meta, hdr->cr_length,
 		    GET16(meta, hdr->Configuration_Record_Length) *
 		    (GET16(meta, hdr->Max_Partitions) + 1));
 		SET32(meta, hdr->pdd_length,
 		    howmany(sizeof(struct ddf_pdd_record), ss));
 		SET32(meta, hdr->bbmlog_length, 0);
 		SET32(meta, hdr->Diagnostic_Space_Length, 0);
 		SET32(meta, hdr->Vendor_Specific_Logs_Length, 0);
 	}
 	pos = 1;
 	SET32(meta, hdr->cd_section, pos);
 	pos += GET32(meta, hdr->cd_length);
 	SET32(meta, hdr->pdr_section, pos);
 	pos += GET32(meta, hdr->pdr_length);
 	SET32(meta, hdr->vdr_section, pos);
 	pos += GET32(meta, hdr->vdr_length);
 	SET32(meta, hdr->cr_section, pos);
 	pos += GET32(meta, hdr->cr_length);
 	SET32(meta, hdr->pdd_section, pos);
 	pos += GET32(meta, hdr->pdd_length);
 	SET32(meta, hdr->bbmlog_section,
 	    GET32(meta, hdr->bbmlog_length) != 0 ? pos : 0xffffffff);
 	pos += GET32(meta, hdr->bbmlog_length);
 	SET32(meta, hdr->Diagnostic_Space,
 	    GET32(meta, hdr->Diagnostic_Space_Length) != 0 ? pos : 0xffffffff);
 	pos += GET32(meta, hdr->Diagnostic_Space_Length);
 	SET32(meta, hdr->Vendor_Specific_Logs,
 	    GET32(meta, hdr->Vendor_Specific_Logs_Length) != 0 ? pos : 0xffffffff);
 	pos += min(GET32(meta, hdr->Vendor_Specific_Logs_Length), 1);
 	SET64(meta, hdr->Primary_Header_LBA,
 	    anchorlba - pos);
 	SET64(meta, hdr->Secondary_Header_LBA,
 	    0xffffffffffffffffULL);
 	SET64(meta, hdr->WorkSpace_LBA,
 	    anchorlba + 1 - 32 * 1024 * 1024 / ss);
 
 	/* Controller Data */
 	size = GET32(meta, hdr->cd_length) * ss;
 	meta->cdr = malloc(size, M_MD_DDF, M_WAITOK);
 	memset(meta->cdr, 0xff, size);
 	SET32(meta, cdr->Signature, DDF_CONTROLLER_DATA_SIGNATURE);
 	memcpy(meta->cdr->Controller_GUID, "FreeBSD GEOM RAID SERIAL", 24);
 	memcpy(meta->cdr->Product_ID, "FreeBSD GEOMRAID", 16);
 
 	/* Physical Drive Records. */
 	size = GET32(meta, hdr->pdr_length) * ss;
 	meta->pdr = malloc(size, M_MD_DDF, M_WAITOK);
 	memset(meta->pdr, 0xff, size);
 	SET32(meta, pdr->Signature, DDF_PDR_SIGNATURE);
 	SET16(meta, pdr->Populated_PDEs, 1);
 	SET16(meta, pdr->Max_PDE_Supported,
 	    GET16(meta, hdr->Max_PD_Entries));
 
 	pde = &meta->pdr->entry[0];
 	len = sizeof(serial_buffer);
 	error = g_io_getattr("GEOM::ident", disk->d_consumer, &len, serial_buffer);
 	if (error == 0 && (len = strlen (serial_buffer)) >= 6 && len <= 20)
 		snprintf(pde->PD_GUID, 25, "DISK%20s", serial_buffer);
 	else
 		snprintf(pde->PD_GUID, 25, "DISK%04d%02d%02d%08x%04x",
 		    ct.year, ct.mon, ct.day,
 		    arc4random(), arc4random() & 0xffff);
 	SET32D(meta, pde->PD_Reference, arc4random());
 	SET16D(meta, pde->PD_Type, DDF_PDE_GUID_FORCE);
 	SET16D(meta, pde->PD_State, 0);
 	SET64D(meta, pde->Configured_Size,
 	    anchorlba + 1 - 32 * 1024 * 1024 / ss);
 	SET16D(meta, pde->Block_Size, ss);
 
 	/* Virtual Drive Records. */
 	size = GET32(meta, hdr->vdr_length) * ss;
 	meta->vdr = malloc(size, M_MD_DDF, M_WAITOK);
 	memset(meta->vdr, 0xff, size);
 	SET32(meta, vdr->Signature, DDF_VD_RECORD_SIGNATURE);
 	SET32(meta, vdr->Populated_VDEs, 0);
 	SET16(meta, vdr->Max_VDE_Supported,
 	    GET16(meta, hdr->Max_VD_Entries));
 
 	/* Configuration Records. */
 	size = GET32(meta, hdr->cr_length) * ss;
 	meta->cr = malloc(size, M_MD_DDF, M_WAITOK);
 	memset(meta->cr, 0xff, size);
 
 	/* Physical Disk Data. */
 	size = GET32(meta, hdr->pdd_length) * ss;
 	meta->pdd = malloc(size, M_MD_DDF, M_WAITOK);
 	memset(meta->pdd, 0xff, size);
 	SET32(meta, pdd->Signature, DDF_PDD_SIGNATURE);
 	memcpy(meta->pdd->PD_GUID, pde->PD_GUID, 24);
 	SET32(meta, pdd->PD_Reference, GET32D(meta, pde->PD_Reference));
 	SET8(meta, pdd->Forced_Ref_Flag, DDF_PDD_FORCED_REF);
 	SET8(meta, pdd->Forced_PD_GUID_Flag, DDF_PDD_FORCED_GUID);
 
 	/* Bad Block Management Log. */
 	if (GET32(meta, hdr->bbmlog_length) != 0) {
 		size = GET32(meta, hdr->bbmlog_length) * ss;
 		meta->bbm = malloc(size, M_MD_DDF, M_WAITOK);
 		memset(meta->bbm, 0xff, size);
 		SET32(meta, bbm->Signature, DDF_BBML_SIGNATURE);
 		SET32(meta, bbm->Entry_Count, 0);
 		SET32(meta, bbm->Spare_Block_Count, 0);
 	}
 }
 
 static void
 ddf_meta_copy(struct ddf_meta *dst, struct ddf_meta *src)
 {
 	u_int ss;
 
 	dst->bigendian = src->bigendian;
 	ss = dst->sectorsize = src->sectorsize;
 	dst->hdr = malloc(ss, M_MD_DDF, M_WAITOK);
 	memcpy(dst->hdr, src->hdr, ss);
 	dst->cdr = malloc(GET32(src, hdr->cd_length) * ss, M_MD_DDF, M_WAITOK);
 	memcpy(dst->cdr, src->cdr, GET32(src, hdr->cd_length) * ss);
 	dst->pdr = malloc(GET32(src, hdr->pdr_length) * ss, M_MD_DDF, M_WAITOK);
 	memcpy(dst->pdr, src->pdr, GET32(src, hdr->pdr_length) * ss);
 	dst->vdr = malloc(GET32(src, hdr->vdr_length) * ss, M_MD_DDF, M_WAITOK);
 	memcpy(dst->vdr, src->vdr, GET32(src, hdr->vdr_length) * ss);
 	dst->cr = malloc(GET32(src, hdr->cr_length) * ss, M_MD_DDF, M_WAITOK);
 	memcpy(dst->cr, src->cr, GET32(src, hdr->cr_length) * ss);
 	dst->pdd = malloc(GET32(src, hdr->pdd_length) * ss, M_MD_DDF, M_WAITOK);
 	memcpy(dst->pdd, src->pdd, GET32(src, hdr->pdd_length) * ss);
 	if (src->bbm != NULL) {
 		dst->bbm = malloc(GET32(src, hdr->bbmlog_length) * ss, M_MD_DDF, M_WAITOK);
 		memcpy(dst->bbm, src->bbm, GET32(src, hdr->bbmlog_length) * ss);
 	}
 }
 
 static void
 ddf_meta_update(struct ddf_meta *meta, struct ddf_meta *src)
 {
 	struct ddf_pd_entry *pde, *spde;
 	int i, j;
 
 	for (i = 0; i < GET16(src, pdr->Populated_PDEs); i++) {
 		spde = &src->pdr->entry[i];
 		if (isff(spde->PD_GUID, 24))
 			continue;
 		j = ddf_meta_find_pd(meta, NULL,
 		    GET32(src, pdr->entry[i].PD_Reference));
 		if (j < 0) {
 			j = ddf_meta_find_pd(meta, NULL, 0xffffffff);
 			pde = &meta->pdr->entry[j];
 			memcpy(pde, spde, sizeof(*pde));
 		} else {
 			pde = &meta->pdr->entry[j];
 			SET16D(meta, pde->PD_State,
 			    GET16D(meta, pde->PD_State) |
 			    GET16D(src, pde->PD_State));
 		}
 	}
 }
 
 static void
 ddf_meta_free(struct ddf_meta *meta)
 {
 
 	if (meta->hdr != NULL) {
 		free(meta->hdr, M_MD_DDF);
 		meta->hdr = NULL;
 	}
 	if (meta->cdr != NULL) {
 		free(meta->cdr, M_MD_DDF);
 		meta->cdr = NULL;
 	}
 	if (meta->pdr != NULL) {
 		free(meta->pdr, M_MD_DDF);
 		meta->pdr = NULL;
 	}
 	if (meta->vdr != NULL) {
 		free(meta->vdr, M_MD_DDF);
 		meta->vdr = NULL;
 	}
 	if (meta->cr != NULL) {
 		free(meta->cr, M_MD_DDF);
 		meta->cr = NULL;
 	}
 	if (meta->pdd != NULL) {
 		free(meta->pdd, M_MD_DDF);
 		meta->pdd = NULL;
 	}
 	if (meta->bbm != NULL) {
 		free(meta->bbm, M_MD_DDF);
 		meta->bbm = NULL;
 	}
 }
 
 static void
 ddf_vol_meta_create(struct ddf_vol_meta *meta, struct ddf_meta *sample)
 {
 	struct timespec ts;
 	struct clocktime ct;
 	u_int ss, size;
 
 	meta->bigendian = sample->bigendian;
 	ss = meta->sectorsize = sample->sectorsize;
 	meta->hdr = malloc(ss, M_MD_DDF, M_WAITOK);
 	memcpy(meta->hdr, sample->hdr, ss);
 	meta->cdr = malloc(GET32(sample, hdr->cd_length) * ss, M_MD_DDF, M_WAITOK);
 	memcpy(meta->cdr, sample->cdr, GET32(sample, hdr->cd_length) * ss);
 	meta->vde = malloc(sizeof(struct ddf_vd_entry), M_MD_DDF, M_WAITOK);
 	memset(meta->vde, 0xff, sizeof(struct ddf_vd_entry));
 	getnanotime(&ts);
 	clock_ts_to_ct(&ts, &ct);
 	snprintf(meta->vde->VD_GUID, 25, "FreeBSD%04d%02d%02d%08x%01x",
 	    ct.year, ct.mon, ct.day,
 	    arc4random(), arc4random() & 0xf);
 	size = GET16(sample, hdr->Configuration_Record_Length) * ss;
 	meta->vdc = malloc(size, M_MD_DDF, M_WAITOK);
 	memset(meta->vdc, 0xff, size);
 	SET32(meta, vdc->Signature, DDF_VDCR_SIGNATURE);
 	memcpy(meta->vdc->VD_GUID, meta->vde->VD_GUID, 24);
 	SET32(meta, vdc->Sequence_Number, 0);
 }
 
 static void
 ddf_vol_meta_update(struct ddf_vol_meta *dst, struct ddf_meta *src,
     uint8_t *GUID, int started)
 {
 	struct ddf_vd_entry *vde;
 	struct ddf_vdc_record *vdc;
 	int vnew, bvnew, bvd, size;
 	u_int ss;
 
 	vde = &src->vdr->entry[ddf_meta_find_vd(src, GUID)];
 	vdc = ddf_meta_find_vdc(src, GUID);
 	if (GET8D(src, vdc->Secondary_Element_Count) == 1)
 		bvd = 0;
 	else
 		bvd = GET8D(src, vdc->Secondary_Element_Seq);
 	size = GET16(src, hdr->Configuration_Record_Length) * src->sectorsize;
 
 	if (dst->vdc == NULL ||
 	    (!started && ((int32_t)(GET32D(src, vdc->Sequence_Number) -
 	    GET32(dst, vdc->Sequence_Number))) > 0))
 		vnew = 1;
 	else
 		vnew = 0;
 
 	if (dst->bvdc[bvd] == NULL ||
 	    (!started && ((int32_t)(GET32D(src, vdc->Sequence_Number) -
 	    GET32(dst, bvdc[bvd]->Sequence_Number))) > 0))
 		bvnew = 1;
 	else
 		bvnew = 0;
 
 	if (vnew) {
 		dst->bigendian = src->bigendian;
 		ss = dst->sectorsize = src->sectorsize;
 		if (dst->hdr != NULL)
 			free(dst->hdr, M_MD_DDF);
 		dst->hdr = malloc(ss, M_MD_DDF, M_WAITOK);
 		memcpy(dst->hdr, src->hdr, ss);
 		if (dst->cdr != NULL)
 			free(dst->cdr, M_MD_DDF);
 		dst->cdr = malloc(GET32(src, hdr->cd_length) * ss, M_MD_DDF, M_WAITOK);
 		memcpy(dst->cdr, src->cdr, GET32(src, hdr->cd_length) * ss);
 		if (dst->vde != NULL)
 			free(dst->vde, M_MD_DDF);
 		dst->vde = malloc(sizeof(struct ddf_vd_entry), M_MD_DDF, M_WAITOK);
 		memcpy(dst->vde, vde, sizeof(struct ddf_vd_entry));
 		if (dst->vdc != NULL)
 			free(dst->vdc, M_MD_DDF);
 		dst->vdc = malloc(size, M_MD_DDF, M_WAITOK);
 		memcpy(dst->vdc, vdc, size);
 	}
 	if (bvnew) {
 		if (dst->bvdc[bvd] != NULL)
 			free(dst->bvdc[bvd], M_MD_DDF);
 		dst->bvdc[bvd] = malloc(size, M_MD_DDF, M_WAITOK);
 		memcpy(dst->bvdc[bvd], vdc, size);
 	}
 }
 
 static void
 ddf_vol_meta_free(struct ddf_vol_meta *meta)
 {
 	int i;
 
 	if (meta->hdr != NULL) {
 		free(meta->hdr, M_MD_DDF);
 		meta->hdr = NULL;
 	}
 	if (meta->cdr != NULL) {
 		free(meta->cdr, M_MD_DDF);
 		meta->cdr = NULL;
 	}
 	if (meta->vde != NULL) {
 		free(meta->vde, M_MD_DDF);
 		meta->vde = NULL;
 	}
 	if (meta->vdc != NULL) {
 		free(meta->vdc, M_MD_DDF);
 		meta->vdc = NULL;
 	}
 	for (i = 0; i < DDF_MAX_DISKS_HARD; i++) {
 		if (meta->bvdc[i] != NULL) {
 			free(meta->bvdc[i], M_MD_DDF);
 			meta->bvdc[i] = NULL;
 		}
 	}
 }
 
 static int
 ddf_meta_unused_range(struct ddf_meta *meta, off_t *off, off_t *size)
 {
 	struct ddf_vdc_record *vdc;
 	off_t beg[32], end[32], beg1, end1;
 	uint64_t *offp;
 	int i, j, n, num, pos;
 	uint32_t ref;
 
 	*off = 0;
 	*size = 0;
 	ref = GET32(meta, pdd->PD_Reference);
 	pos = ddf_meta_find_pd(meta, NULL, ref);
 	beg[0] = 0;
 	end[0] = GET64(meta, pdr->entry[pos].Configured_Size);
 	n = 1;
 	num = GETCRNUM(meta);
 	for (i = 0; i < num; i++) {
 		vdc = GETVDCPTR(meta, i);
 		if (GET32D(meta, vdc->Signature) != DDF_VDCR_SIGNATURE)
 			continue;
 		for (pos = 0; pos < GET16D(meta, vdc->Primary_Element_Count); pos++)
 			if (GET32D(meta, vdc->Physical_Disk_Sequence[pos]) == ref)
 				break;
 		if (pos == GET16D(meta, vdc->Primary_Element_Count))
 			continue;
 		offp = (uint64_t *)&(vdc->Physical_Disk_Sequence[
 		    GET16(meta, hdr->Max_Primary_Element_Entries)]);
 		beg1 = GET64P(meta, offp + pos);
 		end1 = beg1 + GET64D(meta, vdc->Block_Count);
 		for (j = 0; j < n; j++) {
 			if (beg[j] >= end1 || end[j] <= beg1 )
 				continue;
 			if (beg[j] < beg1 && end[j] > end1) {
 				beg[n] = end1;
 				end[n] = end[j];
 				end[j] = beg1;
 				n++;
 			} else if (beg[j] < beg1)
 				end[j] = beg1;
 			else
 				beg[j] = end1;
 		}
 	}
 	for (j = 0; j < n; j++) {
 		if (end[j] - beg[j] > *size) {
 			*off = beg[j];
 			*size = end[j] - beg[j];
 		}
 	}
 	return ((*size > 0) ? 1 : 0);
 }
 
 static void
 ddf_meta_get_name(struct ddf_meta *meta, int num, char *buf)
 {
 	const char *b;
 	int i;
 
 	b = meta->vdr->entry[num].VD_Name;
 	for (i = 15; i >= 0; i--)
 		if (b[i] != 0x20)
 			break;
 	memcpy(buf, b, i + 1);
 	buf[i + 1] = 0;
 }
 
 static void
 ddf_meta_put_name(struct ddf_vol_meta *meta, char *buf)
 {
 	int len;
 
 	len = min(strlen(buf), 16);
 	memset(meta->vde->VD_Name, 0x20, 16);
 	memcpy(meta->vde->VD_Name, buf, len);
 }
 
 static int
 ddf_meta_read(struct g_consumer *cp, struct ddf_meta *meta)
 {
 	struct g_provider *pp;
 	struct ddf_header *ahdr, *hdr;
 	char *abuf, *buf;
 	off_t plba, slba, lba;
 	int error, len, i;
 	u_int ss;
 	uint32_t val;
 
 	ddf_meta_free(meta);
 	pp = cp->provider;
 	ss = meta->sectorsize = pp->sectorsize;
 	/* Read anchor block. */
 	abuf = g_read_data(cp, pp->mediasize - ss, ss, &error);
 	if (abuf == NULL) {
 		G_RAID_DEBUG(1, "Cannot read metadata from %s (error=%d).",
 		    pp->name, error);
 		return (error);
 	}
 	ahdr = (struct ddf_header *)abuf;
 
 	/* Check if this is an DDF RAID struct */
 	if (be32dec(&ahdr->Signature) == DDF_HEADER_SIGNATURE)
 		meta->bigendian = 1;
 	else if (le32dec(&ahdr->Signature) == DDF_HEADER_SIGNATURE)
 		meta->bigendian = 0;
 	else {
 		G_RAID_DEBUG(1, "DDF signature check failed on %s", pp->name);
 		error = EINVAL;
 		goto done;
 	}
 	if (ahdr->Header_Type != DDF_HEADER_ANCHOR) {
 		G_RAID_DEBUG(1, "DDF header type check failed on %s", pp->name);
 		error = EINVAL;
 		goto done;
 	}
 	meta->hdr = ahdr;
 	plba = GET64(meta, hdr->Primary_Header_LBA);
 	slba = GET64(meta, hdr->Secondary_Header_LBA);
 	val = GET32(meta, hdr->CRC);
 	SET32(meta, hdr->CRC, 0xffffffff);
 	meta->hdr = NULL;
 	if (crc32(ahdr, ss) != val) {
 		G_RAID_DEBUG(1, "DDF CRC mismatch on %s", pp->name);
 		error = EINVAL;
 		goto done;
 	}
 	if ((plba + 6) * ss >= pp->mediasize) {
 		G_RAID_DEBUG(1, "DDF primary header LBA is wrong on %s", pp->name);
 		error = EINVAL;
 		goto done;
 	}
 	if (slba != -1 && (slba + 6) * ss >= pp->mediasize) {
 		G_RAID_DEBUG(1, "DDF secondary header LBA is wrong on %s", pp->name);
 		error = EINVAL;
 		goto done;
 	}
 	lba = plba;
 
 doread:
 	error = 0;
 	ddf_meta_free(meta);
 
 	/* Read header block. */
 	buf = g_read_data(cp, lba * ss, ss, &error);
 	if (buf == NULL) {
 readerror:
 		G_RAID_DEBUG(1, "DDF %s metadata read error on %s (error=%d).",
 		    (lba == plba) ? "primary" : "secondary", pp->name, error);
 		if (lba == plba && slba != -1) {
 			lba = slba;
 			goto doread;
 		}
 		G_RAID_DEBUG(1, "DDF metadata read error on %s.", pp->name);
 		goto done;
 	}
 	meta->hdr = malloc(ss, M_MD_DDF, M_WAITOK);
 	memcpy(meta->hdr, buf, ss);
 	g_free(buf);
 	hdr = meta->hdr;
 	val = GET32(meta, hdr->CRC);
 	SET32(meta, hdr->CRC, 0xffffffff);
 	if (hdr->Signature != ahdr->Signature ||
 	    crc32(meta->hdr, ss) != val ||
 	    memcmp(hdr->DDF_Header_GUID, ahdr->DDF_Header_GUID, 24) ||
 	    GET64(meta, hdr->Primary_Header_LBA) != plba ||
 	    GET64(meta, hdr->Secondary_Header_LBA) != slba) {
 hdrerror:
 		G_RAID_DEBUG(1, "DDF %s metadata check failed on %s",
 		    (lba == plba) ? "primary" : "secondary", pp->name);
 		if (lba == plba && slba != -1) {
 			lba = slba;
 			goto doread;
 		}
 		G_RAID_DEBUG(1, "DDF metadata check failed on %s", pp->name);
 		error = EINVAL;
 		goto done;
 	}
 	if ((lba == plba && hdr->Header_Type != DDF_HEADER_PRIMARY) ||
 	    (lba == slba && hdr->Header_Type != DDF_HEADER_SECONDARY))
 		goto hdrerror;
 	len = 1;
 	len = max(len, GET32(meta, hdr->cd_section) + GET32(meta, hdr->cd_length));
 	len = max(len, GET32(meta, hdr->pdr_section) + GET32(meta, hdr->pdr_length));
 	len = max(len, GET32(meta, hdr->vdr_section) + GET32(meta, hdr->vdr_length));
 	len = max(len, GET32(meta, hdr->cr_section) + GET32(meta, hdr->cr_length));
 	len = max(len, GET32(meta, hdr->pdd_section) + GET32(meta, hdr->pdd_length));
 	if ((val = GET32(meta, hdr->bbmlog_section)) != 0xffffffff)
 		len = max(len, val + GET32(meta, hdr->bbmlog_length));
 	if ((val = GET32(meta, hdr->Diagnostic_Space)) != 0xffffffff)
 		len = max(len, val + GET32(meta, hdr->Diagnostic_Space_Length));
 	if ((val = GET32(meta, hdr->Vendor_Specific_Logs)) != 0xffffffff)
 		len = max(len, val + GET32(meta, hdr->Vendor_Specific_Logs_Length));
 	if ((plba + len) * ss >= pp->mediasize)
 		goto hdrerror;
 	if (slba != -1 && (slba + len) * ss >= pp->mediasize)
 		goto hdrerror;
 	/* Workaround for Adaptec implementation. */
 	if (GET16(meta, hdr->Max_Primary_Element_Entries) == 0xffff) {
 		SET16(meta, hdr->Max_Primary_Element_Entries,
 		    min(GET16(meta, hdr->Max_PD_Entries),
 		    (GET16(meta, hdr->Configuration_Record_Length) * ss - 512) / 12));
 	}
 
 	if (GET32(meta, hdr->cd_length) * ss >= MAXPHYS ||
 	    GET32(meta, hdr->pdr_length) * ss >= MAXPHYS ||
 	    GET32(meta, hdr->vdr_length) * ss >= MAXPHYS ||
 	    GET32(meta, hdr->cr_length) * ss >= MAXPHYS ||
 	    GET32(meta, hdr->pdd_length) * ss >= MAXPHYS ||
 	    GET32(meta, hdr->bbmlog_length) * ss >= MAXPHYS) {
 		G_RAID_DEBUG(1, "%s: Blocksize is too big.", pp->name);
 		goto hdrerror;
 	}
 
 	/* Read controller data. */
 	buf = g_read_data(cp, (lba + GET32(meta, hdr->cd_section)) * ss,
 	    GET32(meta, hdr->cd_length) * ss, &error);
 	if (buf == NULL)
 		goto readerror;
 	meta->cdr = malloc(GET32(meta, hdr->cd_length) * ss, M_MD_DDF, M_WAITOK);
 	memcpy(meta->cdr, buf, GET32(meta, hdr->cd_length) * ss);
 	g_free(buf);
 	if (GET32(meta, cdr->Signature) != DDF_CONTROLLER_DATA_SIGNATURE)
 		goto hdrerror;
 
 	/* Read physical disk records. */
 	buf = g_read_data(cp, (lba + GET32(meta, hdr->pdr_section)) * ss,
 	    GET32(meta, hdr->pdr_length) * ss, &error);
 	if (buf == NULL)
 		goto readerror;
 	meta->pdr = malloc(GET32(meta, hdr->pdr_length) * ss, M_MD_DDF, M_WAITOK);
 	memcpy(meta->pdr, buf, GET32(meta, hdr->pdr_length) * ss);
 	g_free(buf);
 	if (GET32(meta, pdr->Signature) != DDF_PDR_SIGNATURE)
 		goto hdrerror;
 	/*
 	 * Workaround for reading metadata corrupted due to graid bug.
 	 * XXX: Remove this before we have disks above 128PB. :)
 	 */
 	if (meta->bigendian) {
 		for (i = 0; i < GET16(meta, pdr->Populated_PDEs); i++) {
 			if (isff(meta->pdr->entry[i].PD_GUID, 24))
 				continue;
 			if (GET32(meta, pdr->entry[i].PD_Reference) ==
 			    0xffffffff)
 				continue;
 			if (GET64(meta, pdr->entry[i].Configured_Size) >=
 			     (1ULL << 48)) {
 				SET16(meta, pdr->entry[i].PD_State,
 				    GET16(meta, pdr->entry[i].PD_State) &
 				    ~DDF_PDE_FAILED);
 				SET64(meta, pdr->entry[i].Configured_Size,
 				    GET64(meta, pdr->entry[i].Configured_Size) &
 				    ((1ULL << 48) - 1));
 			}
 		}
 	}
 
 	/* Read virtual disk records. */
 	buf = g_read_data(cp, (lba + GET32(meta, hdr->vdr_section)) * ss,
 	    GET32(meta, hdr->vdr_length) * ss, &error);
 	if (buf == NULL)
 		goto readerror;
 	meta->vdr = malloc(GET32(meta, hdr->vdr_length) * ss, M_MD_DDF, M_WAITOK);
 	memcpy(meta->vdr, buf, GET32(meta, hdr->vdr_length) * ss);
 	g_free(buf);
 	if (GET32(meta, vdr->Signature) != DDF_VD_RECORD_SIGNATURE)
 		goto hdrerror;
 
 	/* Read configuration records. */
 	buf = g_read_data(cp, (lba + GET32(meta, hdr->cr_section)) * ss,
 	    GET32(meta, hdr->cr_length) * ss, &error);
 	if (buf == NULL)
 		goto readerror;
 	meta->cr = malloc(GET32(meta, hdr->cr_length) * ss, M_MD_DDF, M_WAITOK);
 	memcpy(meta->cr, buf, GET32(meta, hdr->cr_length) * ss);
 	g_free(buf);
 
 	/* Read physical disk data. */
 	buf = g_read_data(cp, (lba + GET32(meta, hdr->pdd_section)) * ss,
 	    GET32(meta, hdr->pdd_length) * ss, &error);
 	if (buf == NULL)
 		goto readerror;
 	meta->pdd = malloc(GET32(meta, hdr->pdd_length) * ss, M_MD_DDF, M_WAITOK);
 	memcpy(meta->pdd, buf, GET32(meta, hdr->pdd_length) * ss);
 	g_free(buf);
 	if (GET32(meta, pdd->Signature) != DDF_PDD_SIGNATURE)
 		goto hdrerror;
 	i = ddf_meta_find_pd(meta, NULL, GET32(meta, pdd->PD_Reference));
 	if (i < 0)
 		goto hdrerror;
 
 	/* Read BBM Log. */
 	if (GET32(meta, hdr->bbmlog_section) != 0xffffffff &&
 	    GET32(meta, hdr->bbmlog_length) != 0) {
 		buf = g_read_data(cp, (lba + GET32(meta, hdr->bbmlog_section)) * ss,
 		    GET32(meta, hdr->bbmlog_length) * ss, &error);
 		if (buf == NULL)
 			goto readerror;
 		meta->bbm = malloc(GET32(meta, hdr->bbmlog_length) * ss, M_MD_DDF, M_WAITOK);
 		memcpy(meta->bbm, buf, GET32(meta, hdr->bbmlog_length) * ss);
 		g_free(buf);
 		if (GET32(meta, bbm->Signature) != DDF_BBML_SIGNATURE)
 			goto hdrerror;
 	}
 
 done:
 	g_free(abuf);
 	if (error != 0)
 		ddf_meta_free(meta);
 	return (error);
 }
 
 static int
 ddf_meta_write(struct g_consumer *cp, struct ddf_meta *meta)
 {
 	struct g_provider *pp;
 	struct ddf_vdc_record *vdc;
 	off_t alba, plba, slba, lba;
 	u_int ss, size;
 	int error, i, num;
 
 	pp = cp->provider;
 	ss = pp->sectorsize;
 	lba = alba = pp->mediasize / ss - 1;
 	plba = GET64(meta, hdr->Primary_Header_LBA);
 	slba = GET64(meta, hdr->Secondary_Header_LBA);
 
 next:
 	SET8(meta, hdr->Header_Type, (lba == alba) ? DDF_HEADER_ANCHOR :
 	    (lba == plba) ? DDF_HEADER_PRIMARY : DDF_HEADER_SECONDARY);
 	SET32(meta, hdr->CRC, 0xffffffff);
 	SET32(meta, hdr->CRC, crc32(meta->hdr, ss));
 	error = g_write_data(cp, lba * ss, meta->hdr, ss);
 	if (error != 0) {
 err:
 		G_RAID_DEBUG(1, "Cannot write metadata to %s (error=%d).",
 		    pp->name, error);
 		if (lba != alba)
 			goto done;
 	}
 	if (lba == alba) {
 		lba = plba;
 		goto next;
 	}
 
 	size = GET32(meta, hdr->cd_length) * ss;
 	SET32(meta, cdr->CRC, 0xffffffff);
 	SET32(meta, cdr->CRC, crc32(meta->cdr, size));
 	error = g_write_data(cp, (lba + GET32(meta, hdr->cd_section)) * ss,
 	    meta->cdr, size);
 	if (error != 0)
 		goto err;
 
 	size = GET32(meta, hdr->pdr_length) * ss;
 	SET32(meta, pdr->CRC, 0xffffffff);
 	SET32(meta, pdr->CRC, crc32(meta->pdr, size));
 	error = g_write_data(cp, (lba + GET32(meta, hdr->pdr_section)) * ss,
 	    meta->pdr, size);
 	if (error != 0)
 		goto err;
 
 	size = GET32(meta, hdr->vdr_length) * ss;
 	SET32(meta, vdr->CRC, 0xffffffff);
 	SET32(meta, vdr->CRC, crc32(meta->vdr, size));
 	error = g_write_data(cp, (lba + GET32(meta, hdr->vdr_section)) * ss,
 	    meta->vdr, size);
 	if (error != 0)
 		goto err;
 
 	size = GET16(meta, hdr->Configuration_Record_Length) * ss;
 	num = GETCRNUM(meta);
 	for (i = 0; i < num; i++) {
 		vdc = GETVDCPTR(meta, i);
 		SET32D(meta, vdc->CRC, 0xffffffff);
 		SET32D(meta, vdc->CRC, crc32(vdc, size));
 	}
 	error = g_write_data(cp, (lba + GET32(meta, hdr->cr_section)) * ss,
 	    meta->cr, size * num);
 	if (error != 0)
 		goto err;
 
 	size = GET32(meta, hdr->pdd_length) * ss;
 	SET32(meta, pdd->CRC, 0xffffffff);
 	SET32(meta, pdd->CRC, crc32(meta->pdd, size));
 	error = g_write_data(cp, (lba + GET32(meta, hdr->pdd_section)) * ss,
 	    meta->pdd, size);
 	if (error != 0)
 		goto err;
 
 	if (GET32(meta, hdr->bbmlog_length) != 0) {
 		size = GET32(meta, hdr->bbmlog_length) * ss;
 		SET32(meta, bbm->CRC, 0xffffffff);
 		SET32(meta, bbm->CRC, crc32(meta->bbm, size));
 		error = g_write_data(cp,
 		    (lba + GET32(meta, hdr->bbmlog_section)) * ss,
 		    meta->bbm, size);
 		if (error != 0)
 			goto err;
 	}
 
 done:
 	if (lba == plba && slba != -1) {
 		lba = slba;
 		goto next;
 	}
 
 	return (error);
 }
 
 static int
 ddf_meta_erase(struct g_consumer *cp)
 {
 	struct g_provider *pp;
 	char *buf;
 	int error;
 
 	pp = cp->provider;
 	buf = malloc(pp->sectorsize, M_MD_DDF, M_WAITOK | M_ZERO);
 	error = g_write_data(cp, pp->mediasize - pp->sectorsize,
 	    buf, pp->sectorsize);
 	if (error != 0) {
 		G_RAID_DEBUG(1, "Cannot erase metadata on %s (error=%d).",
 		    pp->name, error);
 	}
 	free(buf, M_MD_DDF);
 	return (error);
 }
 
 static struct g_raid_volume *
 g_raid_md_ddf_get_volume(struct g_raid_softc *sc, uint8_t *GUID)
 {
 	struct g_raid_volume	*vol;
 	struct g_raid_md_ddf_pervolume *pv;
 
 	TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
 		pv = vol->v_md_data;
 		if (memcmp(pv->pv_meta.vde->VD_GUID, GUID, 24) == 0)
 			break;
 	}
 	return (vol);
 }
 
 static struct g_raid_disk *
 g_raid_md_ddf_get_disk(struct g_raid_softc *sc, uint8_t *GUID, uint32_t id)
 {
 	struct g_raid_disk	*disk;
 	struct g_raid_md_ddf_perdisk *pd;
 	struct ddf_meta *meta;
 
 	TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 		pd = (struct g_raid_md_ddf_perdisk *)disk->d_md_data;
 		meta = &pd->pd_meta;
 		if (GUID != NULL) {
 			if (memcmp(meta->pdd->PD_GUID, GUID, 24) == 0)
 				break;
 		} else {
 			if (GET32(meta, pdd->PD_Reference) == id)
 				break;
 		}
 	}
 	return (disk);
 }
 
 static int
 g_raid_md_ddf_purge_volumes(struct g_raid_softc *sc)
 {
 	struct g_raid_volume	*vol, *tvol;
 	int i, res;
 
 	res = 0;
 	TAILQ_FOREACH_SAFE(vol, &sc->sc_volumes, v_next, tvol) {
 		if (vol->v_stopping)
 			continue;
 		for (i = 0; i < vol->v_disks_count; i++) {
 			if (vol->v_subdisks[i].sd_state != G_RAID_SUBDISK_S_NONE)
 				break;
 		}
 		if (i >= vol->v_disks_count) {
 			g_raid_destroy_volume(vol);
 			res = 1;
 		}
 	}
 	return (res);
 }
 
 static int
 g_raid_md_ddf_purge_disks(struct g_raid_softc *sc)
 {
 #if 0
 	struct g_raid_disk	*disk, *tdisk;
 	struct g_raid_volume	*vol;
 	struct g_raid_md_ddf_perdisk *pd;
 	int i, j, res;
 
 	res = 0;
 	TAILQ_FOREACH_SAFE(disk, &sc->sc_disks, d_next, tdisk) {
 		if (disk->d_state == G_RAID_DISK_S_SPARE)
 			continue;
 		pd = (struct g_raid_md_ddf_perdisk *)disk->d_md_data;
 
 		/* Scan for deleted volumes. */
 		for (i = 0; i < pd->pd_subdisks; ) {
 			vol = g_raid_md_ddf_get_volume(sc,
 			    pd->pd_meta[i]->volume_id);
 			if (vol != NULL && !vol->v_stopping) {
 				i++;
 				continue;
 			}
 			free(pd->pd_meta[i], M_MD_DDF);
 			for (j = i; j < pd->pd_subdisks - 1; j++)
 				pd->pd_meta[j] = pd->pd_meta[j + 1];
 			pd->pd_meta[DDF_MAX_SUBDISKS - 1] = NULL;
 			pd->pd_subdisks--;
 			pd->pd_updated = 1;
 		}
 
 		/* If there is no metadata left - erase and delete disk. */
 		if (pd->pd_subdisks == 0) {
 			ddf_meta_erase(disk->d_consumer);
 			g_raid_destroy_disk(disk);
 			res = 1;
 		}
 	}
 	return (res);
 #endif
 	return (0);
 }
 
 static int
 g_raid_md_ddf_supported(int level, int qual, int disks, int force)
 {
 
 	if (disks > DDF_MAX_DISKS_HARD)
 		return (0);
 	switch (level) {
 	case G_RAID_VOLUME_RL_RAID0:
 		if (qual != G_RAID_VOLUME_RLQ_NONE)
 			return (0);
 		if (disks < 1)
 			return (0);
 		if (!force && disks < 2)
 			return (0);
 		break;
 	case G_RAID_VOLUME_RL_RAID1:
 		if (disks < 1)
 			return (0);
 		if (qual == G_RAID_VOLUME_RLQ_R1SM) {
 			if (!force && disks != 2)
 				return (0);
 		} else if (qual == G_RAID_VOLUME_RLQ_R1MM) {
 			if (!force && disks != 3)
 				return (0);
 		} else 
 			return (0);
 		break;
 	case G_RAID_VOLUME_RL_RAID3:
 		if (qual != G_RAID_VOLUME_RLQ_R3P0 &&
 		    qual != G_RAID_VOLUME_RLQ_R3PN)
 			return (0);
 		if (disks < 3)
 			return (0);
 		break;
 	case G_RAID_VOLUME_RL_RAID4:
 		if (qual != G_RAID_VOLUME_RLQ_R4P0 &&
 		    qual != G_RAID_VOLUME_RLQ_R4PN)
 			return (0);
 		if (disks < 3)
 			return (0);
 		break;
 	case G_RAID_VOLUME_RL_RAID5:
 		if (qual != G_RAID_VOLUME_RLQ_R5RA &&
 		    qual != G_RAID_VOLUME_RLQ_R5RS &&
 		    qual != G_RAID_VOLUME_RLQ_R5LA &&
 		    qual != G_RAID_VOLUME_RLQ_R5LS)
 			return (0);
 		if (disks < 3)
 			return (0);
 		break;
 	case G_RAID_VOLUME_RL_RAID6:
 		if (qual != G_RAID_VOLUME_RLQ_R6RA &&
 		    qual != G_RAID_VOLUME_RLQ_R6RS &&
 		    qual != G_RAID_VOLUME_RLQ_R6LA &&
 		    qual != G_RAID_VOLUME_RLQ_R6LS)
 			return (0);
 		if (disks < 4)
 			return (0);
 		break;
 	case G_RAID_VOLUME_RL_RAIDMDF:
 		if (qual != G_RAID_VOLUME_RLQ_RMDFRA &&
 		    qual != G_RAID_VOLUME_RLQ_RMDFRS &&
 		    qual != G_RAID_VOLUME_RLQ_RMDFLA &&
 		    qual != G_RAID_VOLUME_RLQ_RMDFLS)
 			return (0);
 		if (disks < 4)
 			return (0);
 		break;
 	case G_RAID_VOLUME_RL_RAID1E:
 		if (qual != G_RAID_VOLUME_RLQ_R1EA &&
 		    qual != G_RAID_VOLUME_RLQ_R1EO)
 			return (0);
 		if (disks < 3)
 			return (0);
 		break;
 	case G_RAID_VOLUME_RL_SINGLE:
 		if (qual != G_RAID_VOLUME_RLQ_NONE)
 			return (0);
 		if (disks != 1)
 			return (0);
 		break;
 	case G_RAID_VOLUME_RL_CONCAT:
 		if (qual != G_RAID_VOLUME_RLQ_NONE)
 			return (0);
 		if (disks < 2)
 			return (0);
 		break;
 	case G_RAID_VOLUME_RL_RAID5E:
 		if (qual != G_RAID_VOLUME_RLQ_R5ERA &&
 		    qual != G_RAID_VOLUME_RLQ_R5ERS &&
 		    qual != G_RAID_VOLUME_RLQ_R5ELA &&
 		    qual != G_RAID_VOLUME_RLQ_R5ELS)
 			return (0);
 		if (disks < 4)
 			return (0);
 		break;
 	case G_RAID_VOLUME_RL_RAID5EE:
 		if (qual != G_RAID_VOLUME_RLQ_R5EERA &&
 		    qual != G_RAID_VOLUME_RLQ_R5EERS &&
 		    qual != G_RAID_VOLUME_RLQ_R5EELA &&
 		    qual != G_RAID_VOLUME_RLQ_R5EELS)
 			return (0);
 		if (disks < 4)
 			return (0);
 		break;
 	case G_RAID_VOLUME_RL_RAID5R:
 		if (qual != G_RAID_VOLUME_RLQ_R5RRA &&
 		    qual != G_RAID_VOLUME_RLQ_R5RRS &&
 		    qual != G_RAID_VOLUME_RLQ_R5RLA &&
 		    qual != G_RAID_VOLUME_RLQ_R5RLS)
 			return (0);
 		if (disks < 3)
 			return (0);
 		break;
 	default:
 		return (0);
 	}
 	return (1);
 }
 
 static int
 g_raid_md_ddf_start_disk(struct g_raid_disk *disk, struct g_raid_volume *vol)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_subdisk *sd;
 	struct g_raid_md_ddf_perdisk *pd;
 	struct g_raid_md_ddf_pervolume *pv;
 	struct g_raid_md_ddf_object *mdi;
 	struct ddf_vol_meta *vmeta;
 	struct ddf_meta *pdmeta, *gmeta;
 	struct ddf_vdc_record *vdc1;
 	struct ddf_sa_record *sa;
 	off_t size, eoff = 0, esize = 0;
 	uint64_t *val2;
 	int disk_pos, md_disk_bvd = -1, md_disk_pos = -1, md_pde_pos;
 	int i, resurrection = 0;
 	uint32_t reference;
 
 	sc = disk->d_softc;
 	mdi = (struct g_raid_md_ddf_object *)sc->sc_md;
 	pd = (struct g_raid_md_ddf_perdisk *)disk->d_md_data;
 	pdmeta = &pd->pd_meta;
 	reference = GET32(&pd->pd_meta, pdd->PD_Reference);
 
 	pv = vol->v_md_data;
 	vmeta = &pv->pv_meta;
 	gmeta = &mdi->mdio_meta;
 
 	/* Find disk position in metadata by its reference. */
 	disk_pos = ddf_meta_find_disk(vmeta, reference,
 	    &md_disk_bvd, &md_disk_pos);
 	md_pde_pos = ddf_meta_find_pd(gmeta, NULL, reference);
 
 	if (disk_pos < 0) {
 		G_RAID_DEBUG1(1, sc,
 		    "Disk %s is not a present part of the volume %s",
 		    g_raid_get_diskname(disk), vol->v_name);
 
 		/* Failed stale disk is useless for us. */
 		if ((GET16(gmeta, pdr->entry[md_pde_pos].PD_State) & DDF_PDE_PFA) != 0) {
 			g_raid_change_disk_state(disk, G_RAID_DISK_S_STALE_FAILED);
 			return (0);
 		}
 
 		/* If disk has some metadata for this volume - erase. */
 		if ((vdc1 = ddf_meta_find_vdc(pdmeta, vmeta->vdc->VD_GUID)) != NULL)
 			SET32D(pdmeta, vdc1->Signature, 0xffffffff);
 
 		/* If we are in the start process, that's all for now. */
 		if (!pv->pv_started)
 			goto nofit;
 		/*
 		 * If we have already started - try to get use of the disk.
 		 * Try to replace OFFLINE disks first, then FAILED.
 		 */
 		if (ddf_meta_count_vdc(&pd->pd_meta, NULL) >=
 			GET16(&pd->pd_meta, hdr->Max_Partitions)) {
 			G_RAID_DEBUG1(1, sc, "No free partitions on disk %s",
 			    g_raid_get_diskname(disk));
 			goto nofit;
 		}
 		ddf_meta_unused_range(&pd->pd_meta, &eoff, &esize);
 		if (esize == 0) {
 			G_RAID_DEBUG1(1, sc, "No free space on disk %s",
 			    g_raid_get_diskname(disk));
 			goto nofit;
 		}
 		eoff *= pd->pd_meta.sectorsize;
 		esize *= pd->pd_meta.sectorsize;
 		size = INT64_MAX;
 		for (i = 0; i < vol->v_disks_count; i++) {
 			sd = &vol->v_subdisks[i];
 			if (sd->sd_state != G_RAID_SUBDISK_S_NONE)
 				size = sd->sd_size;
 			if (sd->sd_state <= G_RAID_SUBDISK_S_FAILED &&
 			    (disk_pos < 0 ||
 			     vol->v_subdisks[i].sd_state < sd->sd_state))
 				disk_pos = i;
 		}
 		if (disk_pos >= 0 &&
 		    vol->v_raid_level != G_RAID_VOLUME_RL_CONCAT &&
 		    esize < size) {
 			G_RAID_DEBUG1(1, sc, "Disk %s free space "
 			    "is too small (%ju < %ju)",
 			    g_raid_get_diskname(disk), esize, size);
 			disk_pos = -1;
 		}
 		if (disk_pos >= 0) {
 			if (vol->v_raid_level != G_RAID_VOLUME_RL_CONCAT)
 				esize = size;
 			md_disk_bvd = disk_pos / GET16(vmeta, vdc->Primary_Element_Count); // XXX
 			md_disk_pos = disk_pos % GET16(vmeta, vdc->Primary_Element_Count); // XXX
 		} else {
 nofit:
 			if (disk->d_state == G_RAID_DISK_S_NONE)
 				g_raid_change_disk_state(disk,
 				    G_RAID_DISK_S_STALE);
 			return (0);
 		}
 
 		/*
 		 * If spare is committable, delete spare record.
 		 * Othersize, mark it active and leave there.
 		 */
 		sa = ddf_meta_find_sa(&pd->pd_meta, 0);
 		if (sa != NULL) {
 			if ((GET8D(&pd->pd_meta, sa->Spare_Type) &
 			    DDF_SAR_TYPE_REVERTIBLE) == 0) {
 				SET32D(&pd->pd_meta, sa->Signature, 0xffffffff);
 			} else {
 				SET8D(&pd->pd_meta, sa->Spare_Type,
 				    GET8D(&pd->pd_meta, sa->Spare_Type) |
 				    DDF_SAR_TYPE_ACTIVE);
 			}
 		}
 
 		G_RAID_DEBUG1(1, sc, "Disk %s takes pos %d in the volume %s",
 		    g_raid_get_diskname(disk), disk_pos, vol->v_name);
 		resurrection = 1;
 	}
 
 	sd = &vol->v_subdisks[disk_pos];
 
 	if (resurrection && sd->sd_disk != NULL) {
 		g_raid_change_disk_state(sd->sd_disk,
 		    G_RAID_DISK_S_STALE_FAILED);
 		TAILQ_REMOVE(&sd->sd_disk->d_subdisks,
 		    sd, sd_next);
 	}
 	vol->v_subdisks[disk_pos].sd_disk = disk;
 	TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next);
 
 	/* Welcome the new disk. */
 	if (resurrection)
 		g_raid_change_disk_state(disk, G_RAID_DISK_S_ACTIVE);
 	else if (GET16(gmeta, pdr->entry[md_pde_pos].PD_State) & DDF_PDE_PFA)
 		g_raid_change_disk_state(disk, G_RAID_DISK_S_FAILED);
 	else
 		g_raid_change_disk_state(disk, G_RAID_DISK_S_ACTIVE);
 
 	if (resurrection) {
 		sd->sd_offset = eoff;
 		sd->sd_size = esize;
 	} else if (pdmeta->cr != NULL &&
 	    (vdc1 = ddf_meta_find_vdc(pdmeta, vmeta->vdc->VD_GUID)) != NULL) {
 		val2 = (uint64_t *)&(vdc1->Physical_Disk_Sequence[GET16(vmeta, hdr->Max_Primary_Element_Entries)]);
 		sd->sd_offset = (off_t)GET64P(pdmeta, val2 + md_disk_pos) * 512;
 		sd->sd_size = (off_t)GET64D(pdmeta, vdc1->Block_Count) * 512;
 	}
 
 	if (resurrection) {
 		/* Stale disk, almost same as new. */
 		g_raid_change_subdisk_state(sd,
 		    G_RAID_SUBDISK_S_NEW);
 	} else if (GET16(gmeta, pdr->entry[md_pde_pos].PD_State) & DDF_PDE_PFA) {
 		/* Failed disk. */
 		g_raid_change_subdisk_state(sd,
 		    G_RAID_SUBDISK_S_FAILED);
 	} else if ((GET16(gmeta, pdr->entry[md_pde_pos].PD_State) &
 	     (DDF_PDE_FAILED | DDF_PDE_REBUILD)) != 0) {
 		/* Rebuilding disk. */
 		g_raid_change_subdisk_state(sd,
 		    G_RAID_SUBDISK_S_REBUILD);
 		sd->sd_rebuild_pos = 0;
 	} else if ((GET8(vmeta, vde->VD_State) & DDF_VDE_DIRTY) != 0 ||
 	    (GET8(vmeta, vde->Init_State) & DDF_VDE_INIT_MASK) !=
 	     DDF_VDE_INIT_FULL) {
 		/* Stale disk or dirty volume (unclean shutdown). */
 		g_raid_change_subdisk_state(sd,
 		    G_RAID_SUBDISK_S_STALE);
 	} else {
 		/* Up to date disk. */
 		g_raid_change_subdisk_state(sd,
 		    G_RAID_SUBDISK_S_ACTIVE);
 	}
 	g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW,
 	    G_RAID_EVENT_SUBDISK);
 
 	return (resurrection);
 }
 
 static void
 g_raid_md_ddf_refill(struct g_raid_softc *sc)
 {
 	struct g_raid_volume *vol;
 	struct g_raid_subdisk *sd;
 	struct g_raid_disk *disk;
 	struct g_raid_md_object *md;
 	struct g_raid_md_ddf_perdisk *pd;
 	struct g_raid_md_ddf_pervolume *pv;
 	int update, updated, i, bad;
 
 	md = sc->sc_md;
 restart:
 	updated = 0;
 	TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
 		pv = vol->v_md_data;
 		if (!pv->pv_started || vol->v_stopping)
 			continue;
 
 		/* Search for subdisk that needs replacement. */
 		bad = 0;
 		for (i = 0; i < vol->v_disks_count; i++) {
 			sd = &vol->v_subdisks[i];
 			if (sd->sd_state == G_RAID_SUBDISK_S_NONE ||
 			    sd->sd_state == G_RAID_SUBDISK_S_FAILED)
 			        bad = 1;
 		}
 		if (!bad)
 			continue;
 
 		G_RAID_DEBUG1(1, sc, "Volume %s is not complete, "
 		    "trying to refill.", vol->v_name);
 
 		TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 			/* Skip failed. */
 			if (disk->d_state < G_RAID_DISK_S_SPARE)
 				continue;
 			/* Skip already used by this volume. */
 			for (i = 0; i < vol->v_disks_count; i++) {
 				sd = &vol->v_subdisks[i];
 				if (sd->sd_disk == disk)
 					break;
 			}
 			if (i < vol->v_disks_count)
 				continue;
 
 			/* Try to use disk if it has empty extents. */
 			pd = disk->d_md_data;
 			if (ddf_meta_count_vdc(&pd->pd_meta, NULL) <
 			    GET16(&pd->pd_meta, hdr->Max_Partitions)) {
 				update = g_raid_md_ddf_start_disk(disk, vol);
 			} else
 				update = 0;
 			if (update) {
 				updated = 1;
 				g_raid_md_write_ddf(md, vol, NULL, disk);
 				break;
 			}
 		}
 	}
 	if (updated)
 		goto restart;
 }
 
 static void
 g_raid_md_ddf_start(struct g_raid_volume *vol)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_subdisk *sd;
 	struct g_raid_disk *disk;
 	struct g_raid_md_object *md;
 	struct g_raid_md_ddf_perdisk *pd;
 	struct g_raid_md_ddf_pervolume *pv;
 	struct g_raid_md_ddf_object *mdi;
 	struct ddf_vol_meta *vmeta;
 	uint64_t *val2;
 	int i, j, bvd;
 
 	sc = vol->v_softc;
 	md = sc->sc_md;
 	mdi = (struct g_raid_md_ddf_object *)md;
 	pv = vol->v_md_data;
 	vmeta = &pv->pv_meta;
 
 	vol->v_raid_level = GET8(vmeta, vdc->Primary_RAID_Level);
 	vol->v_raid_level_qualifier = GET8(vmeta, vdc->RLQ);
 	if (GET8(vmeta, vdc->Secondary_Element_Count) > 1 &&
 	    vol->v_raid_level == G_RAID_VOLUME_RL_RAID1 &&
 	    GET8(vmeta, vdc->Secondary_RAID_Level) == 0)
 		vol->v_raid_level = G_RAID_VOLUME_RL_RAID1E;
 	vol->v_sectorsize = GET16(vmeta, vdc->Block_Size);
 	if (vol->v_sectorsize == 0xffff)
 		vol->v_sectorsize = vmeta->sectorsize;
 	vol->v_strip_size = vol->v_sectorsize << GET8(vmeta, vdc->Stripe_Size);
 	vol->v_disks_count = GET16(vmeta, vdc->Primary_Element_Count) *
 	    GET8(vmeta, vdc->Secondary_Element_Count);
 	vol->v_mdf_pdisks = GET8(vmeta, vdc->MDF_Parity_Disks);
 	vol->v_mdf_polynomial = GET16(vmeta, vdc->MDF_Parity_Generator_Polynomial);
 	vol->v_mdf_method = GET8(vmeta, vdc->MDF_Constant_Generation_Method);
 	if (GET8(vmeta, vdc->Rotate_Parity_count) > 31)
 		vol->v_rotate_parity = 1;
 	else
 		vol->v_rotate_parity = 1 << GET8(vmeta, vdc->Rotate_Parity_count);
 	vol->v_mediasize = GET64(vmeta, vdc->VD_Size) * vol->v_sectorsize;
 	for (i = 0, j = 0, bvd = 0; i < vol->v_disks_count; i++, j++) {
 		if (j == GET16(vmeta, vdc->Primary_Element_Count)) {
 			j = 0;
 			bvd++;
 		}
 		sd = &vol->v_subdisks[i];
 		if (vmeta->bvdc[bvd] == NULL) {
 			sd->sd_offset = 0;
 			sd->sd_size = GET64(vmeta, vdc->Block_Count) *
 			    vol->v_sectorsize;
 			continue;
 		}
 		val2 = (uint64_t *)&(vmeta->bvdc[bvd]->Physical_Disk_Sequence[
 		    GET16(vmeta, hdr->Max_Primary_Element_Entries)]);
 		sd->sd_offset = GET64P(vmeta, val2 + j) * vol->v_sectorsize;
 		sd->sd_size = GET64(vmeta, bvdc[bvd]->Block_Count) *
 		    vol->v_sectorsize;
 	}
 	g_raid_start_volume(vol);
 
 	/* Make all disks found till the moment take their places. */
 	TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 		pd = (struct g_raid_md_ddf_perdisk *)disk->d_md_data;
 		if (ddf_meta_find_vdc(&pd->pd_meta, vmeta->vdc->VD_GUID) != NULL)
 			g_raid_md_ddf_start_disk(disk, vol);
 	}
 
 	pv->pv_started = 1;
 	mdi->mdio_starting--;
 	callout_stop(&pv->pv_start_co);
 	G_RAID_DEBUG1(0, sc, "Volume started.");
 	g_raid_md_write_ddf(md, vol, NULL, NULL);
 
 	/* Pickup any STALE/SPARE disks to refill array if needed. */
 	g_raid_md_ddf_refill(sc);
 
 	g_raid_event_send(vol, G_RAID_VOLUME_E_START, G_RAID_EVENT_VOLUME);
 }
 
 static void
 g_raid_ddf_go(void *arg)
 {
 	struct g_raid_volume *vol;
 	struct g_raid_softc *sc;
 	struct g_raid_md_ddf_pervolume *pv;
 
 	vol = arg;
 	pv = vol->v_md_data;
 	sc = vol->v_softc;
 	if (!pv->pv_started) {
 		G_RAID_DEBUG1(0, sc, "Force volume start due to timeout.");
 		g_raid_event_send(vol, G_RAID_VOLUME_E_STARTMD,
 		    G_RAID_EVENT_VOLUME);
 	}
 }
 
 static void
 g_raid_md_ddf_new_disk(struct g_raid_disk *disk)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_md_object *md;
 	struct g_raid_md_ddf_perdisk *pd;
 	struct g_raid_md_ddf_pervolume *pv;
 	struct g_raid_md_ddf_object *mdi;
 	struct g_raid_volume *vol;
 	struct ddf_meta *pdmeta;
 	struct ddf_vol_meta *vmeta;
 	struct ddf_vdc_record *vdc;
 	struct ddf_vd_entry *vde;
 	int i, j, k, num, have, need, cnt, spare;
 	uint32_t val;
 	char buf[17];
 
 	sc = disk->d_softc;
 	md = sc->sc_md;
 	mdi = (struct g_raid_md_ddf_object *)md;
 	pd = (struct g_raid_md_ddf_perdisk *)disk->d_md_data;
 	pdmeta = &pd->pd_meta;
 	spare = -1;
 
 	if (mdi->mdio_meta.hdr == NULL)
 		ddf_meta_copy(&mdi->mdio_meta, pdmeta);
 	else
 		ddf_meta_update(&mdi->mdio_meta, pdmeta);
 
 	num = GETCRNUM(pdmeta);
 	for (j = 0; j < num; j++) {
 		vdc = GETVDCPTR(pdmeta, j);
 		val = GET32D(pdmeta, vdc->Signature);
 
 		if (val == DDF_SA_SIGNATURE && spare == -1)
 			spare = 1;
 
 		if (val != DDF_VDCR_SIGNATURE)
 			continue;
 		spare = 0;
 		k = ddf_meta_find_vd(pdmeta, vdc->VD_GUID);
 		if (k < 0)
 			continue;
 		vde = &pdmeta->vdr->entry[k];
 
 		/* Look for volume with matching ID. */
 		vol = g_raid_md_ddf_get_volume(sc, vdc->VD_GUID);
 		if (vol == NULL) {
 			ddf_meta_get_name(pdmeta, k, buf);
 			vol = g_raid_create_volume(sc, buf,
 			    GET16D(pdmeta, vde->VD_Number));
 			pv = malloc(sizeof(*pv), M_MD_DDF, M_WAITOK | M_ZERO);
 			vol->v_md_data = pv;
 			callout_init(&pv->pv_start_co, 1);
 			callout_reset(&pv->pv_start_co,
 			    g_raid_start_timeout * hz,
 			    g_raid_ddf_go, vol);
 			mdi->mdio_starting++;
 		} else
 			pv = vol->v_md_data;
 
 		/* If we haven't started yet - check metadata freshness. */
 		vmeta = &pv->pv_meta;
 		ddf_vol_meta_update(vmeta, pdmeta, vdc->VD_GUID, pv->pv_started);
 	}
 
 	if (spare == 1) {
 		g_raid_change_disk_state(disk, G_RAID_DISK_S_SPARE);
 		g_raid_md_ddf_refill(sc);
 	}
 
 	TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
 		pv = vol->v_md_data;
 		vmeta = &pv->pv_meta;
 
 		if (ddf_meta_find_vdc(pdmeta, vmeta->vdc->VD_GUID) == NULL)
 			continue;
 
 		if (pv->pv_started) {
 			if (g_raid_md_ddf_start_disk(disk, vol))
 				g_raid_md_write_ddf(md, vol, NULL, NULL);
 			continue;
 		}
 
 		/* If we collected all needed disks - start array. */
 		need = 0;
 		have = 0;
 		for (k = 0; k < GET8(vmeta, vdc->Secondary_Element_Count); k++) {
 			if (vmeta->bvdc[k] == NULL) {
 				need += GET16(vmeta, vdc->Primary_Element_Count);
 				continue;
 			}
 			cnt = GET16(vmeta, bvdc[k]->Primary_Element_Count);
 			need += cnt;
 			for (i = 0; i < cnt; i++) {
 				val = GET32(vmeta, bvdc[k]->Physical_Disk_Sequence[i]);
 				if (g_raid_md_ddf_get_disk(sc, NULL, val) != NULL)
 					have++;
 			}
 		}
 		G_RAID_DEBUG1(1, sc, "Volume %s now has %d of %d disks",
 		    vol->v_name, have, need);
 		if (have == need)
 			g_raid_md_ddf_start(vol);
 	}
 }
 
 static int
 g_raid_md_create_req_ddf(struct g_raid_md_object *md, struct g_class *mp,
     struct gctl_req *req, struct g_geom **gp)
 {
 	struct g_geom *geom;
 	struct g_raid_softc *sc;
 	struct g_raid_md_ddf_object *mdi, *mdi1;
 	char name[16];
 	const char *fmtopt;
 	int be = 1;
 
 	mdi = (struct g_raid_md_ddf_object *)md;
 	fmtopt = gctl_get_asciiparam(req, "fmtopt");
 	if (fmtopt == NULL || strcasecmp(fmtopt, "BE") == 0)
 		be = 1;
 	else if (strcasecmp(fmtopt, "LE") == 0)
 		be = 0;
 	else {
 		gctl_error(req, "Incorrect fmtopt argument.");
 		return (G_RAID_MD_TASTE_FAIL);
 	}
 
 	/* Search for existing node. */
 	LIST_FOREACH(geom, &mp->geom, geom) {
 		sc = geom->softc;
 		if (sc == NULL)
 			continue;
 		if (sc->sc_stopping != 0)
 			continue;
 		if (sc->sc_md->mdo_class != md->mdo_class)
 			continue;
 		mdi1 = (struct g_raid_md_ddf_object *)sc->sc_md;
 		if (mdi1->mdio_bigendian != be)
 			continue;
 		break;
 	}
 	if (geom != NULL) {
 		*gp = geom;
 		return (G_RAID_MD_TASTE_EXISTING);
 	}
 
 	/* Create new one if not found. */
 	mdi->mdio_bigendian = be;
 	snprintf(name, sizeof(name), "DDF%s", be ? "" : "-LE");
 	sc = g_raid_create_node(mp, name, md);
 	if (sc == NULL)
 		return (G_RAID_MD_TASTE_FAIL);
 	md->mdo_softc = sc;
 	*gp = sc->sc_geom;
 	return (G_RAID_MD_TASTE_NEW);
 }
 
 static int
 g_raid_md_taste_ddf(struct g_raid_md_object *md, struct g_class *mp,
                               struct g_consumer *cp, struct g_geom **gp)
 {
 	struct g_consumer *rcp;
 	struct g_provider *pp;
 	struct g_raid_softc *sc;
 	struct g_raid_disk *disk;
 	struct ddf_meta meta;
 	struct g_raid_md_ddf_perdisk *pd;
 	struct g_raid_md_ddf_object *mdi;
 	struct g_geom *geom;
 	int error, result, be;
 	char name[16];
 
 	G_RAID_DEBUG(1, "Tasting DDF on %s", cp->provider->name);
 	mdi = (struct g_raid_md_ddf_object *)md;
 	pp = cp->provider;
 
 	/* Read metadata from device. */
 	g_topology_unlock();
 	bzero(&meta, sizeof(meta));
 	error = ddf_meta_read(cp, &meta);
 	g_topology_lock();
 	if (error != 0)
 		return (G_RAID_MD_TASTE_FAIL);
 	be = meta.bigendian;
 
 	/* Metadata valid. Print it. */
 	g_raid_md_ddf_print(&meta);
 
 	/* Search for matching node. */
 	sc = NULL;
 	LIST_FOREACH(geom, &mp->geom, geom) {
 		sc = geom->softc;
 		if (sc == NULL)
 			continue;
 		if (sc->sc_stopping != 0)
 			continue;
 		if (sc->sc_md->mdo_class != md->mdo_class)
 			continue;
 		mdi = (struct g_raid_md_ddf_object *)sc->sc_md;
 		if (mdi->mdio_bigendian != be)
 			continue;
 		break;
 	}
 
 	/* Found matching node. */
 	if (geom != NULL) {
 		G_RAID_DEBUG(1, "Found matching array %s", sc->sc_name);
 		result = G_RAID_MD_TASTE_EXISTING;
 
 	} else { /* Not found matching node -- create one. */
 		result = G_RAID_MD_TASTE_NEW;
 		mdi->mdio_bigendian = be;
 		snprintf(name, sizeof(name), "DDF%s", be ? "" : "-LE");
 		sc = g_raid_create_node(mp, name, md);
 		md->mdo_softc = sc;
 		geom = sc->sc_geom;
 	}
 
 	/* There is no return after this point, so we close passed consumer. */
 	g_access(cp, -1, 0, 0);
 
 	rcp = g_new_consumer(geom);
 	rcp->flags |= G_CF_DIRECT_RECEIVE;
 	g_attach(rcp, pp);
 	if (g_access(rcp, 1, 1, 1) != 0)
 		; //goto fail1;
 
 	g_topology_unlock();
 	sx_xlock(&sc->sc_lock);
 
 	pd = malloc(sizeof(*pd), M_MD_DDF, M_WAITOK | M_ZERO);
 	pd->pd_meta = meta;
 	disk = g_raid_create_disk(sc);
 	disk->d_md_data = (void *)pd;
 	disk->d_consumer = rcp;
 	rcp->private = disk;
 
 	g_raid_get_disk_info(disk);
 
 	g_raid_md_ddf_new_disk(disk);
 
 	sx_xunlock(&sc->sc_lock);
 	g_topology_lock();
 	*gp = geom;
 	return (result);
 }
 
 static int
 g_raid_md_event_ddf(struct g_raid_md_object *md,
     struct g_raid_disk *disk, u_int event)
 {
 	struct g_raid_softc *sc;
 
 	sc = md->mdo_softc;
 	if (disk == NULL)
 		return (-1);
 	switch (event) {
 	case G_RAID_DISK_E_DISCONNECTED:
 		/* Delete disk. */
 		g_raid_change_disk_state(disk, G_RAID_DISK_S_NONE);
 		g_raid_destroy_disk(disk);
 		g_raid_md_ddf_purge_volumes(sc);
 
 		/* Write updated metadata to all disks. */
 		g_raid_md_write_ddf(md, NULL, NULL, NULL);
 
 		/* Check if anything left. */
 		if (g_raid_ndisks(sc, -1) == 0)
 			g_raid_destroy_node(sc, 0);
 		else
 			g_raid_md_ddf_refill(sc);
 		return (0);
 	}
 	return (-2);
 }
 
 static int
 g_raid_md_volume_event_ddf(struct g_raid_md_object *md,
     struct g_raid_volume *vol, u_int event)
 {
 	struct g_raid_md_ddf_pervolume *pv;
 
 	pv = (struct g_raid_md_ddf_pervolume *)vol->v_md_data;
 	switch (event) {
 	case G_RAID_VOLUME_E_STARTMD:
 		if (!pv->pv_started)
 			g_raid_md_ddf_start(vol);
 		return (0);
 	}
 	return (-2);
 }
 
 static int
 g_raid_md_ctl_ddf(struct g_raid_md_object *md,
     struct gctl_req *req)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_volume *vol, *vol1;
 	struct g_raid_subdisk *sd;
 	struct g_raid_disk *disk, *disks[DDF_MAX_DISKS_HARD];
 	struct g_raid_md_ddf_perdisk *pd;
 	struct g_raid_md_ddf_pervolume *pv;
 	struct g_raid_md_ddf_object *mdi;
 	struct ddf_sa_record *sa;
 	struct g_consumer *cp;
 	struct g_provider *pp;
 	char arg[16];
 	const char *nodename, *verb, *volname, *levelname, *diskname;
 	char *tmp;
 	int *nargs, *force;
 	off_t size, sectorsize, strip, offs[DDF_MAX_DISKS_HARD], esize;
 	intmax_t *sizearg, *striparg;
 	int i, numdisks, len, level, qual;
 	int error;
 
 	sc = md->mdo_softc;
 	mdi = (struct g_raid_md_ddf_object *)md;
 	verb = gctl_get_param(req, "verb", NULL);
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	error = 0;
 
 	if (strcmp(verb, "label") == 0) {
 
 		if (*nargs < 4) {
 			gctl_error(req, "Invalid number of arguments.");
 			return (-1);
 		}
 		volname = gctl_get_asciiparam(req, "arg1");
 		if (volname == NULL) {
 			gctl_error(req, "No volume name.");
 			return (-2);
 		}
 		levelname = gctl_get_asciiparam(req, "arg2");
 		if (levelname == NULL) {
 			gctl_error(req, "No RAID level.");
 			return (-3);
 		}
 		if (g_raid_volume_str2level(levelname, &level, &qual)) {
 			gctl_error(req, "Unknown RAID level '%s'.", levelname);
 			return (-4);
 		}
 		numdisks = *nargs - 3;
 		force = gctl_get_paraml(req, "force", sizeof(*force));
 		if (!g_raid_md_ddf_supported(level, qual, numdisks,
 		    force ? *force : 0)) {
 			gctl_error(req, "Unsupported RAID level "
 			    "(0x%02x/0x%02x), or number of disks (%d).",
 			    level, qual, numdisks);
 			return (-5);
 		}
 
 		/* Search for disks, connect them and probe. */
 		size = INT64_MAX;
 		sectorsize = 0;
 		bzero(disks, sizeof(disks));
 		bzero(offs, sizeof(offs));
 		for (i = 0; i < numdisks; i++) {
 			snprintf(arg, sizeof(arg), "arg%d", i + 3);
 			diskname = gctl_get_asciiparam(req, arg);
 			if (diskname == NULL) {
 				gctl_error(req, "No disk name (%s).", arg);
 				error = -6;
 				break;
 			}
 			if (strcmp(diskname, "NONE") == 0)
 				continue;
 
 			TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 				if (disk->d_consumer != NULL && 
 				    disk->d_consumer->provider != NULL &&
 				    strcmp(disk->d_consumer->provider->name,
 				     diskname) == 0)
 					break;
 			}
 			if (disk != NULL) {
 				if (disk->d_state != G_RAID_DISK_S_ACTIVE) {
 					gctl_error(req, "Disk '%s' is in a "
 					    "wrong state (%s).", diskname,
 					    g_raid_disk_state2str(disk->d_state));
 					error = -7;
 					break;
 				}
 				pd = disk->d_md_data;
 				if (ddf_meta_count_vdc(&pd->pd_meta, NULL) >=
 				    GET16(&pd->pd_meta, hdr->Max_Partitions)) {
 					gctl_error(req, "No free partitions "
 					    "on disk '%s'.",
 					    diskname);
 					error = -7;
 					break;
 				}
 				pp = disk->d_consumer->provider;
 				disks[i] = disk;
 				ddf_meta_unused_range(&pd->pd_meta,
 				    &offs[i], &esize);
 				offs[i] *= pp->sectorsize;
 				size = MIN(size, (off_t)esize * pp->sectorsize);
 				sectorsize = MAX(sectorsize, pp->sectorsize);
 				continue;
 			}
 
 			g_topology_lock();
 			cp = g_raid_open_consumer(sc, diskname);
 			if (cp == NULL) {
 				gctl_error(req, "Can't open disk '%s'.",
 				    diskname);
 				g_topology_unlock();
 				error = -8;
 				break;
 			}
 			pp = cp->provider;
 			pd = malloc(sizeof(*pd), M_MD_DDF, M_WAITOK | M_ZERO);
 			disk = g_raid_create_disk(sc);
 			disk->d_md_data = (void *)pd;
 			disk->d_consumer = cp;
 			disks[i] = disk;
 			cp->private = disk;
 			ddf_meta_create(disk, &mdi->mdio_meta);
 			if (mdi->mdio_meta.hdr == NULL)
 				ddf_meta_copy(&mdi->mdio_meta, &pd->pd_meta);
 			else
 				ddf_meta_update(&mdi->mdio_meta, &pd->pd_meta);
 			g_topology_unlock();
 
 			g_raid_get_disk_info(disk);
 
 			/* Reserve some space for metadata. */
 			size = MIN(size, GET64(&pd->pd_meta,
 			    pdr->entry[0].Configured_Size) * pp->sectorsize);
 			sectorsize = MAX(sectorsize, pp->sectorsize);
 		}
 		if (error != 0) {
 			for (i = 0; i < numdisks; i++) {
 				if (disks[i] != NULL &&
 				    disks[i]->d_state == G_RAID_DISK_S_NONE)
 					g_raid_destroy_disk(disks[i]);
 			}
 			return (error);
 		}
 
 		if (sectorsize <= 0) {
 			gctl_error(req, "Can't get sector size.");
 			return (-8);
 		}
 
 		/* Handle size argument. */
 		len = sizeof(*sizearg);
 		sizearg = gctl_get_param(req, "size", &len);
 		if (sizearg != NULL && len == sizeof(*sizearg) &&
 		    *sizearg > 0) {
 			if (*sizearg > size) {
 				gctl_error(req, "Size too big %lld > %lld.",
 				    (long long)*sizearg, (long long)size);
 				return (-9);
 			}
 			size = *sizearg;
 		}
 
 		/* Handle strip argument. */
 		strip = 131072;
 		len = sizeof(*striparg);
 		striparg = gctl_get_param(req, "strip", &len);
 		if (striparg != NULL && len == sizeof(*striparg) &&
 		    *striparg > 0) {
 			if (*striparg < sectorsize) {
 				gctl_error(req, "Strip size too small.");
 				return (-10);
 			}
 			if (*striparg % sectorsize != 0) {
 				gctl_error(req, "Incorrect strip size.");
 				return (-11);
 			}
 			strip = *striparg;
 		}
 
 		/* Round size down to strip or sector. */
 		if (level == G_RAID_VOLUME_RL_RAID1 ||
 		    level == G_RAID_VOLUME_RL_RAID3 ||
 		    level == G_RAID_VOLUME_RL_SINGLE ||
 		    level == G_RAID_VOLUME_RL_CONCAT)
 			size -= (size % sectorsize);
 		else if (level == G_RAID_VOLUME_RL_RAID1E &&
 		    (numdisks & 1) != 0)
 			size -= (size % (2 * strip));
 		else
 			size -= (size % strip);
 		if (size <= 0) {
 			gctl_error(req, "Size too small.");
 			return (-13);
 		}
 
 		/* We have all we need, create things: volume, ... */
 		pv = malloc(sizeof(*pv), M_MD_DDF, M_WAITOK | M_ZERO);
 		ddf_vol_meta_create(&pv->pv_meta, &mdi->mdio_meta);
 		pv->pv_started = 1;
 		vol = g_raid_create_volume(sc, volname, -1);
 		vol->v_md_data = pv;
 		vol->v_raid_level = level;
 		vol->v_raid_level_qualifier = qual;
 		vol->v_strip_size = strip;
 		vol->v_disks_count = numdisks;
 		if (level == G_RAID_VOLUME_RL_RAID0 ||
 		    level == G_RAID_VOLUME_RL_CONCAT ||
 		    level == G_RAID_VOLUME_RL_SINGLE)
 			vol->v_mediasize = size * numdisks;
 		else if (level == G_RAID_VOLUME_RL_RAID1)
 			vol->v_mediasize = size;
 		else if (level == G_RAID_VOLUME_RL_RAID3 ||
 		    level == G_RAID_VOLUME_RL_RAID4 ||
 		    level == G_RAID_VOLUME_RL_RAID5)
 			vol->v_mediasize = size * (numdisks - 1);
 		else if (level == G_RAID_VOLUME_RL_RAID5R) {
 			vol->v_mediasize = size * (numdisks - 1);
 			vol->v_rotate_parity = 1024;
 		} else if (level == G_RAID_VOLUME_RL_RAID6 ||
 		    level == G_RAID_VOLUME_RL_RAID5E ||
 		    level == G_RAID_VOLUME_RL_RAID5EE)
 			vol->v_mediasize = size * (numdisks - 2);
 		else if (level == G_RAID_VOLUME_RL_RAIDMDF) {
 			if (numdisks < 5)
 				vol->v_mdf_pdisks = 2;
 			else
 				vol->v_mdf_pdisks = 3;
 			vol->v_mdf_polynomial = 0x11d;
 			vol->v_mdf_method = 0x00;
 			vol->v_mediasize = size * (numdisks - vol->v_mdf_pdisks);
 		} else { /* RAID1E */
 			vol->v_mediasize = ((size * numdisks) / strip / 2) *
 			    strip;
 		}
 		vol->v_sectorsize = sectorsize;
 		g_raid_start_volume(vol);
 
 		/* , and subdisks. */
 		for (i = 0; i < numdisks; i++) {
 			disk = disks[i];
 			sd = &vol->v_subdisks[i];
 			sd->sd_disk = disk;
 			sd->sd_offset = offs[i];
 			sd->sd_size = size;
 			if (disk == NULL)
 				continue;
 			TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next);
 			g_raid_change_disk_state(disk,
 			    G_RAID_DISK_S_ACTIVE);
 			g_raid_change_subdisk_state(sd,
 			    G_RAID_SUBDISK_S_ACTIVE);
 			g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW,
 			    G_RAID_EVENT_SUBDISK);
 		}
 
 		/* Write metadata based on created entities. */
 		G_RAID_DEBUG1(0, sc, "Array started.");
 		g_raid_md_write_ddf(md, vol, NULL, NULL);
 
 		/* Pickup any STALE/SPARE disks to refill array if needed. */
 		g_raid_md_ddf_refill(sc);
 
 		g_raid_event_send(vol, G_RAID_VOLUME_E_START,
 		    G_RAID_EVENT_VOLUME);
 		return (0);
 	}
 	if (strcmp(verb, "add") == 0) {
 
 		gctl_error(req, "`add` command is not applicable, "
 		    "use `label` instead.");
 		return (-99);
 	}
 	if (strcmp(verb, "delete") == 0) {
 
 		nodename = gctl_get_asciiparam(req, "arg0");
 		if (nodename != NULL && strcasecmp(sc->sc_name, nodename) != 0)
 			nodename = NULL;
 
 		/* Full node destruction. */
 		if (*nargs == 1 && nodename != NULL) {
 			/* Check if some volume is still open. */
 			force = gctl_get_paraml(req, "force", sizeof(*force));
 			if (force != NULL && *force == 0 &&
 			    g_raid_nopens(sc) != 0) {
 				gctl_error(req, "Some volume is still open.");
 				return (-4);
 			}
 
 			TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 				if (disk->d_consumer)
 					ddf_meta_erase(disk->d_consumer);
 			}
 			g_raid_destroy_node(sc, 0);
 			return (0);
 		}
 
 		/* Destroy specified volume. If it was last - all node. */
 		if (*nargs > 2) {
 			gctl_error(req, "Invalid number of arguments.");
 			return (-1);
 		}
 		volname = gctl_get_asciiparam(req,
 		    nodename != NULL ? "arg1" : "arg0");
 		if (volname == NULL) {
 			gctl_error(req, "No volume name.");
 			return (-2);
 		}
 
 		/* Search for volume. */
 		TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
 			if (strcmp(vol->v_name, volname) == 0)
 				break;
 			pp = vol->v_provider;
 			if (pp == NULL)
 				continue;
 			if (strcmp(pp->name, volname) == 0)
 				break;
 			if (strncmp(pp->name, "raid/", 5) == 0 &&
 			    strcmp(pp->name + 5, volname) == 0)
 				break;
 		}
 		if (vol == NULL) {
 			i = strtol(volname, &tmp, 10);
 			if (verb != volname && tmp[0] == 0) {
 				TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
 					if (vol->v_global_id == i)
 						break;
 				}
 			}
 		}
 		if (vol == NULL) {
 			gctl_error(req, "Volume '%s' not found.", volname);
 			return (-3);
 		}
 
 		/* Check if volume is still open. */
 		force = gctl_get_paraml(req, "force", sizeof(*force));
 		if (force != NULL && *force == 0 &&
 		    vol->v_provider_open != 0) {
 			gctl_error(req, "Volume is still open.");
 			return (-4);
 		}
 
 		/* Destroy volume and potentially node. */
 		i = 0;
 		TAILQ_FOREACH(vol1, &sc->sc_volumes, v_next)
 			i++;
 		if (i >= 2) {
 			g_raid_destroy_volume(vol);
 			g_raid_md_ddf_purge_disks(sc);
 			g_raid_md_write_ddf(md, NULL, NULL, NULL);
 		} else {
 			TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 				if (disk->d_consumer)
 					ddf_meta_erase(disk->d_consumer);
 			}
 			g_raid_destroy_node(sc, 0);
 		}
 		return (0);
 	}
 	if (strcmp(verb, "remove") == 0 ||
 	    strcmp(verb, "fail") == 0) {
 		if (*nargs < 2) {
 			gctl_error(req, "Invalid number of arguments.");
 			return (-1);
 		}
 		for (i = 1; i < *nargs; i++) {
 			snprintf(arg, sizeof(arg), "arg%d", i);
 			diskname = gctl_get_asciiparam(req, arg);
 			if (diskname == NULL) {
 				gctl_error(req, "No disk name (%s).", arg);
 				error = -2;
 				break;
 			}
 			if (strncmp(diskname, "/dev/", 5) == 0)
 				diskname += 5;
 
 			TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 				if (disk->d_consumer != NULL && 
 				    disk->d_consumer->provider != NULL &&
 				    strcmp(disk->d_consumer->provider->name,
 				     diskname) == 0)
 					break;
 			}
 			if (disk == NULL) {
 				gctl_error(req, "Disk '%s' not found.",
 				    diskname);
 				error = -3;
 				break;
 			}
 
 			if (strcmp(verb, "fail") == 0) {
 				g_raid_md_fail_disk_ddf(md, NULL, disk);
 				continue;
 			}
 
 			/* Erase metadata on deleting disk and destroy it. */
 			ddf_meta_erase(disk->d_consumer);
 			g_raid_destroy_disk(disk);
 		}
 		g_raid_md_ddf_purge_volumes(sc);
 
 		/* Write updated metadata to remaining disks. */
 		g_raid_md_write_ddf(md, NULL, NULL, NULL);
 
 		/* Check if anything left. */
 		if (g_raid_ndisks(sc, -1) == 0)
 			g_raid_destroy_node(sc, 0);
 		else
 			g_raid_md_ddf_refill(sc);
 		return (error);
 	}
 	if (strcmp(verb, "insert") == 0) {
 		if (*nargs < 2) {
 			gctl_error(req, "Invalid number of arguments.");
 			return (-1);
 		}
 		for (i = 1; i < *nargs; i++) {
 			/* Get disk name. */
 			snprintf(arg, sizeof(arg), "arg%d", i);
 			diskname = gctl_get_asciiparam(req, arg);
 			if (diskname == NULL) {
 				gctl_error(req, "No disk name (%s).", arg);
 				error = -3;
 				break;
 			}
 
 			/* Try to find provider with specified name. */
 			g_topology_lock();
 			cp = g_raid_open_consumer(sc, diskname);
 			if (cp == NULL) {
 				gctl_error(req, "Can't open disk '%s'.",
 				    diskname);
 				g_topology_unlock();
 				error = -4;
 				break;
 			}
 			pp = cp->provider;
 			g_topology_unlock();
 
 			pd = malloc(sizeof(*pd), M_MD_DDF, M_WAITOK | M_ZERO);
 
 			disk = g_raid_create_disk(sc);
 			disk->d_consumer = cp;
 			disk->d_md_data = (void *)pd;
 			cp->private = disk;
 
 			g_raid_get_disk_info(disk);
 
 			/* Welcome the "new" disk. */
 			g_raid_change_disk_state(disk, G_RAID_DISK_S_SPARE);
 			ddf_meta_create(disk, &mdi->mdio_meta);
 			sa = ddf_meta_find_sa(&pd->pd_meta, 1);
 			if (sa != NULL) {
 				SET32D(&pd->pd_meta, sa->Signature,
 				    DDF_SA_SIGNATURE);
 				SET8D(&pd->pd_meta, sa->Spare_Type, 0);
 				SET16D(&pd->pd_meta, sa->Populated_SAEs, 0);
 				SET16D(&pd->pd_meta, sa->MAX_SAE_Supported,
 				    (GET16(&pd->pd_meta, hdr->Configuration_Record_Length) *
 				     pd->pd_meta.sectorsize -
 				     sizeof(struct ddf_sa_record)) /
 				    sizeof(struct ddf_sa_entry));
 			}
 			if (mdi->mdio_meta.hdr == NULL)
 				ddf_meta_copy(&mdi->mdio_meta, &pd->pd_meta);
 			else
 				ddf_meta_update(&mdi->mdio_meta, &pd->pd_meta);
 			g_raid_md_write_ddf(md, NULL, NULL, NULL);
 			g_raid_md_ddf_refill(sc);
 		}
 		return (error);
 	}
 	return (-100);
 }
 
 static int
 g_raid_md_write_ddf(struct g_raid_md_object *md, struct g_raid_volume *tvol,
     struct g_raid_subdisk *tsd, struct g_raid_disk *tdisk)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_volume *vol;
 	struct g_raid_subdisk *sd;
 	struct g_raid_disk *disk;
 	struct g_raid_md_ddf_perdisk *pd;
 	struct g_raid_md_ddf_pervolume *pv;
 	struct g_raid_md_ddf_object *mdi;
 	struct ddf_meta *gmeta;
 	struct ddf_vol_meta *vmeta;
 	struct ddf_vdc_record *vdc;
 	struct ddf_sa_record *sa;
 	uint64_t *val2;
 	int i, j, pos, bvd, size;
 
 	sc = md->mdo_softc;
 	mdi = (struct g_raid_md_ddf_object *)md;
 	gmeta = &mdi->mdio_meta;
 
 	if (sc->sc_stopping == G_RAID_DESTROY_HARD)
 		return (0);
 
 	/*
 	 * Clear disk flags to let only really needed ones to be reset.
 	 * Do it only if there are no volumes in starting state now,
 	 * as they can update disk statuses yet and we may kill innocent.
 	 */
 	if (mdi->mdio_starting == 0) {
 		for (i = 0; i < GET16(gmeta, pdr->Populated_PDEs); i++) {
 			if (isff(gmeta->pdr->entry[i].PD_GUID, 24))
 				continue;
 			SET16(gmeta, pdr->entry[i].PD_Type,
 			    GET16(gmeta, pdr->entry[i].PD_Type) &
 			    ~(DDF_PDE_PARTICIPATING |
 			      DDF_PDE_GLOBAL_SPARE | DDF_PDE_CONFIG_SPARE));
 			if ((GET16(gmeta, pdr->entry[i].PD_State) &
 			    DDF_PDE_PFA) == 0)
 				SET16(gmeta, pdr->entry[i].PD_State, 0);
 		}
 	}
 
 	/* Generate/update new per-volume metadata. */
 	TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
 		pv = (struct g_raid_md_ddf_pervolume *)vol->v_md_data;
 		if (vol->v_stopping || !pv->pv_started)
 			continue;
 		vmeta = &pv->pv_meta;
 
 		SET32(vmeta, vdc->Sequence_Number,
 		    GET32(vmeta, vdc->Sequence_Number) + 1);
 		if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1E &&
 		    vol->v_disks_count % 2 == 0)
 			SET16(vmeta, vdc->Primary_Element_Count, 2);
 		else
 			SET16(vmeta, vdc->Primary_Element_Count,
 			    vol->v_disks_count);
 		SET8(vmeta, vdc->Stripe_Size,
 		    ffs(vol->v_strip_size / vol->v_sectorsize) - 1);
 		if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1E &&
 		    vol->v_disks_count % 2 == 0) {
 			SET8(vmeta, vdc->Primary_RAID_Level,
 			    DDF_VDCR_RAID1);
 			SET8(vmeta, vdc->RLQ, 0);
 			SET8(vmeta, vdc->Secondary_Element_Count,
 			    vol->v_disks_count / 2);
 			SET8(vmeta, vdc->Secondary_RAID_Level, 0);
 		} else {
 			SET8(vmeta, vdc->Primary_RAID_Level,
 			    vol->v_raid_level);
 			SET8(vmeta, vdc->RLQ,
 			    vol->v_raid_level_qualifier);
 			SET8(vmeta, vdc->Secondary_Element_Count, 1);
 			SET8(vmeta, vdc->Secondary_RAID_Level, 0);
 		}
 		SET8(vmeta, vdc->Secondary_Element_Seq, 0);
 		SET64(vmeta, vdc->Block_Count, 0);
 		SET64(vmeta, vdc->VD_Size, vol->v_mediasize / vol->v_sectorsize);
 		SET16(vmeta, vdc->Block_Size, vol->v_sectorsize);
 		SET8(vmeta, vdc->Rotate_Parity_count,
 		    fls(vol->v_rotate_parity) - 1);
 		SET8(vmeta, vdc->MDF_Parity_Disks, vol->v_mdf_pdisks);
 		SET16(vmeta, vdc->MDF_Parity_Generator_Polynomial,
 		    vol->v_mdf_polynomial);
 		SET8(vmeta, vdc->MDF_Constant_Generation_Method,
 		    vol->v_mdf_method);
 
 		SET16(vmeta, vde->VD_Number, vol->v_global_id);
 		if (vol->v_state <= G_RAID_VOLUME_S_BROKEN)
 			SET8(vmeta, vde->VD_State, DDF_VDE_FAILED);
 		else if (vol->v_state <= G_RAID_VOLUME_S_DEGRADED)
 			SET8(vmeta, vde->VD_State, DDF_VDE_DEGRADED);
 		else if (vol->v_state <= G_RAID_VOLUME_S_SUBOPTIMAL)
 			SET8(vmeta, vde->VD_State, DDF_VDE_PARTIAL);
 		else
 			SET8(vmeta, vde->VD_State, DDF_VDE_OPTIMAL);
 		if (vol->v_dirty ||
 		    g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_STALE) > 0 ||
 		    g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_RESYNC) > 0)
 			SET8(vmeta, vde->VD_State,
 			    GET8(vmeta, vde->VD_State) | DDF_VDE_DIRTY);
 		SET8(vmeta, vde->Init_State, DDF_VDE_INIT_FULL); // XXX
 		ddf_meta_put_name(vmeta, vol->v_name);
 
 		for (i = 0; i < vol->v_disks_count; i++) {
 			sd = &vol->v_subdisks[i];
 			bvd = i / GET16(vmeta, vdc->Primary_Element_Count);
 			pos = i % GET16(vmeta, vdc->Primary_Element_Count);
 			disk = sd->sd_disk;
 			if (disk != NULL) {
 				pd = (struct g_raid_md_ddf_perdisk *)disk->d_md_data;
 				if (vmeta->bvdc[bvd] == NULL) {
 					size = GET16(vmeta,
 					    hdr->Configuration_Record_Length) *
 					    vmeta->sectorsize;
 					vmeta->bvdc[bvd] = malloc(size,
 					    M_MD_DDF, M_WAITOK);
 					memset(vmeta->bvdc[bvd], 0xff, size);
 				}
 				memcpy(vmeta->bvdc[bvd], vmeta->vdc,
 				    sizeof(struct ddf_vdc_record));
 				SET8(vmeta, bvdc[bvd]->Secondary_Element_Seq, bvd);
 				SET64(vmeta, bvdc[bvd]->Block_Count,
 				    sd->sd_size / vol->v_sectorsize);
 				SET32(vmeta, bvdc[bvd]->Physical_Disk_Sequence[pos],
 				    GET32(&pd->pd_meta, pdd->PD_Reference));
 				val2 = (uint64_t *)&(vmeta->bvdc[bvd]->Physical_Disk_Sequence[
 				    GET16(vmeta, hdr->Max_Primary_Element_Entries)]);
 				SET64P(vmeta, val2 + pos,
 				    sd->sd_offset / vol->v_sectorsize);
 			}
 			if (vmeta->bvdc[bvd] == NULL)
 				continue;
 
 			j = ddf_meta_find_pd(gmeta, NULL,
 			    GET32(vmeta, bvdc[bvd]->Physical_Disk_Sequence[pos]));
 			if (j < 0)
 				continue;
 			SET16(gmeta, pdr->entry[j].PD_Type,
 			    GET16(gmeta, pdr->entry[j].PD_Type) |
 			    DDF_PDE_PARTICIPATING);
 			if (sd->sd_state == G_RAID_SUBDISK_S_NONE)
 				SET16(gmeta, pdr->entry[j].PD_State,
 				    GET16(gmeta, pdr->entry[j].PD_State) |
 				    (DDF_PDE_FAILED | DDF_PDE_MISSING));
 			else if (sd->sd_state == G_RAID_SUBDISK_S_FAILED)
 				SET16(gmeta, pdr->entry[j].PD_State,
 				    GET16(gmeta, pdr->entry[j].PD_State) |
 				    (DDF_PDE_FAILED | DDF_PDE_PFA));
 			else if (sd->sd_state <= G_RAID_SUBDISK_S_REBUILD)
 				SET16(gmeta, pdr->entry[j].PD_State,
 				    GET16(gmeta, pdr->entry[j].PD_State) |
 				    DDF_PDE_REBUILD);
 			else
 				SET16(gmeta, pdr->entry[j].PD_State,
 				    GET16(gmeta, pdr->entry[j].PD_State) |
 				    DDF_PDE_ONLINE);
 		}
 	}
 
 	/* Mark spare and failed disks as such. */
 	TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 		pd = (struct g_raid_md_ddf_perdisk *)disk->d_md_data;
 		i = ddf_meta_find_pd(gmeta, NULL,
 		    GET32(&pd->pd_meta, pdd->PD_Reference));
 		if (i < 0)
 			continue;
 		if (disk->d_state == G_RAID_DISK_S_FAILED) {
 			SET16(gmeta, pdr->entry[i].PD_State,
 			    GET16(gmeta, pdr->entry[i].PD_State) |
 			    (DDF_PDE_FAILED | DDF_PDE_PFA));
 		}
 		if (disk->d_state != G_RAID_DISK_S_SPARE)
 			continue;
 		sa = ddf_meta_find_sa(&pd->pd_meta, 0);
 		if (sa == NULL ||
 		    (GET8D(&pd->pd_meta, sa->Spare_Type) &
 		     DDF_SAR_TYPE_DEDICATED) == 0) {
 			SET16(gmeta, pdr->entry[i].PD_Type,
 			    GET16(gmeta, pdr->entry[i].PD_Type) |
 			    DDF_PDE_GLOBAL_SPARE);
 		} else {
 			SET16(gmeta, pdr->entry[i].PD_Type,
 			    GET16(gmeta, pdr->entry[i].PD_Type) |
 			    DDF_PDE_CONFIG_SPARE);
 		}
 		SET16(gmeta, pdr->entry[i].PD_State,
 		    GET16(gmeta, pdr->entry[i].PD_State) |
 		    DDF_PDE_ONLINE);
 	}
 
 	/* Remove disks without "participating" flag (unused). */
 	for (i = 0, j = -1; i < GET16(gmeta, pdr->Populated_PDEs); i++) {
 		if (isff(gmeta->pdr->entry[i].PD_GUID, 24))
 			continue;
 		if ((GET16(gmeta, pdr->entry[i].PD_Type) &
 		    (DDF_PDE_PARTICIPATING |
 		     DDF_PDE_GLOBAL_SPARE | DDF_PDE_CONFIG_SPARE)) != 0 ||
 		    g_raid_md_ddf_get_disk(sc,
 		     NULL, GET32(gmeta, pdr->entry[i].PD_Reference)) != NULL)
 			j = i;
 		else
 			memset(&gmeta->pdr->entry[i], 0xff,
 			    sizeof(struct ddf_pd_entry));
 	}
 	SET16(gmeta, pdr->Populated_PDEs, j + 1);
 
 	/* Update per-disk metadata and write them. */
 	TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 		pd = (struct g_raid_md_ddf_perdisk *)disk->d_md_data;
 		if (disk->d_state != G_RAID_DISK_S_ACTIVE &&
 		    disk->d_state != G_RAID_DISK_S_SPARE)
 			continue;
 		/* Update PDR. */
 		memcpy(pd->pd_meta.pdr, gmeta->pdr,
 		    GET32(&pd->pd_meta, hdr->pdr_length) *
 		    pd->pd_meta.sectorsize);
 		/* Update VDR. */
 		SET16(&pd->pd_meta, vdr->Populated_VDEs, 0);
 		TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
 			if (vol->v_stopping)
 				continue;
 			pv = (struct g_raid_md_ddf_pervolume *)vol->v_md_data;
 			i = ddf_meta_find_vd(&pd->pd_meta,
 			    pv->pv_meta.vde->VD_GUID);
 			if (i < 0)
 				i = ddf_meta_find_vd(&pd->pd_meta, NULL);
 			if (i >= 0)
 				memcpy(&pd->pd_meta.vdr->entry[i],
 				    pv->pv_meta.vde,
 				    sizeof(struct ddf_vd_entry));
 		}
 		/* Update VDC. */
 		if (mdi->mdio_starting == 0) {
 			/* Remove all VDCs to restore needed later. */
 			j = GETCRNUM(&pd->pd_meta);
 			for (i = 0; i < j; i++) {
 				vdc = GETVDCPTR(&pd->pd_meta, i);
 				if (GET32D(&pd->pd_meta, vdc->Signature) !=
 				    DDF_VDCR_SIGNATURE)
 					continue;
 				SET32D(&pd->pd_meta, vdc->Signature, 0xffffffff);
 			}
 		}
 		TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) {
 			vol = sd->sd_volume;
 			if (vol->v_stopping)
 				continue;
 			pv = (struct g_raid_md_ddf_pervolume *)vol->v_md_data;
 			vmeta = &pv->pv_meta;
 			vdc = ddf_meta_find_vdc(&pd->pd_meta,
 			    vmeta->vde->VD_GUID);
 			if (vdc == NULL)
 				vdc = ddf_meta_find_vdc(&pd->pd_meta, NULL);
 			if (vdc != NULL) {
 				bvd = sd->sd_pos / GET16(vmeta,
 				    vdc->Primary_Element_Count);
 				memcpy(vdc, vmeta->bvdc[bvd],
 				    GET16(&pd->pd_meta,
 				    hdr->Configuration_Record_Length) *
 				    pd->pd_meta.sectorsize);
 			}
 		}
 		G_RAID_DEBUG(1, "Writing DDF metadata to %s",
 		    g_raid_get_diskname(disk));
 		g_raid_md_ddf_print(&pd->pd_meta);
 		ddf_meta_write(disk->d_consumer, &pd->pd_meta);
 	}
 	return (0);
 }
 
 static int
 g_raid_md_fail_disk_ddf(struct g_raid_md_object *md,
     struct g_raid_subdisk *tsd, struct g_raid_disk *tdisk)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_md_ddf_perdisk *pd;
 	struct g_raid_subdisk *sd;
 	int i;
 
 	sc = md->mdo_softc;
 	pd = (struct g_raid_md_ddf_perdisk *)tdisk->d_md_data;
 
 	/* We can't fail disk that is not a part of array now. */
 	if (tdisk->d_state != G_RAID_DISK_S_ACTIVE)
 		return (-1);
 
 	/*
 	 * Mark disk as failed in metadata and try to write that metadata
 	 * to the disk itself to prevent it's later resurrection as STALE.
 	 */
 	G_RAID_DEBUG(1, "Writing DDF metadata to %s",
 	    g_raid_get_diskname(tdisk));
 	i = ddf_meta_find_pd(&pd->pd_meta, NULL, GET32(&pd->pd_meta, pdd->PD_Reference));
 	SET16(&pd->pd_meta, pdr->entry[i].PD_State, DDF_PDE_FAILED | DDF_PDE_PFA);
 	if (tdisk->d_consumer != NULL)
 		ddf_meta_write(tdisk->d_consumer, &pd->pd_meta);
 
 	/* Change states. */
 	g_raid_change_disk_state(tdisk, G_RAID_DISK_S_FAILED);
 	TAILQ_FOREACH(sd, &tdisk->d_subdisks, sd_next) {
 		g_raid_change_subdisk_state(sd,
 		    G_RAID_SUBDISK_S_FAILED);
 		g_raid_event_send(sd, G_RAID_SUBDISK_E_FAILED,
 		    G_RAID_EVENT_SUBDISK);
 	}
 
 	/* Write updated metadata to remaining disks. */
 	g_raid_md_write_ddf(md, NULL, NULL, tdisk);
 
 	g_raid_md_ddf_refill(sc);
 	return (0);
 }
 
 static int
 g_raid_md_free_disk_ddf(struct g_raid_md_object *md,
     struct g_raid_disk *disk)
 {
 	struct g_raid_md_ddf_perdisk *pd;
 
 	pd = (struct g_raid_md_ddf_perdisk *)disk->d_md_data;
 	ddf_meta_free(&pd->pd_meta);
 	free(pd, M_MD_DDF);
 	disk->d_md_data = NULL;
 	return (0);
 }
 
 static int
 g_raid_md_free_volume_ddf(struct g_raid_md_object *md,
     struct g_raid_volume *vol)
 {
 	struct g_raid_md_ddf_object *mdi;
 	struct g_raid_md_ddf_pervolume *pv;
 
 	mdi = (struct g_raid_md_ddf_object *)md;
 	pv = (struct g_raid_md_ddf_pervolume *)vol->v_md_data;
 	ddf_vol_meta_free(&pv->pv_meta);
 	if (!pv->pv_started) {
 		pv->pv_started = 1;
 		mdi->mdio_starting--;
 		callout_stop(&pv->pv_start_co);
 	}
 	free(pv, M_MD_DDF);
 	vol->v_md_data = NULL;
 	return (0);
 }
 
 static int
 g_raid_md_free_ddf(struct g_raid_md_object *md)
 {
 	struct g_raid_md_ddf_object *mdi;
 
 	mdi = (struct g_raid_md_ddf_object *)md;
 	if (!mdi->mdio_started) {
 		mdi->mdio_started = 0;
 		callout_stop(&mdi->mdio_start_co);
 		G_RAID_DEBUG1(1, md->mdo_softc,
 		    "root_mount_rel %p", mdi->mdio_rootmount);
 		root_mount_rel(mdi->mdio_rootmount);
 		mdi->mdio_rootmount = NULL;
 	}
 	ddf_meta_free(&mdi->mdio_meta);
 	return (0);
 }
 
 G_RAID_MD_DECLARE(ddf, "DDF");
Index: head/sys/geom/raid/md_intel.c
===================================================================
--- head/sys/geom/raid/md_intel.c	(revision 350693)
+++ head/sys/geom/raid/md_intel.c	(revision 350694)
@@ -1,2717 +1,2718 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2010 Alexander Motin <mav@FreeBSD.org>
  * Copyright (c) 2000 - 2008 Søren Schmidt <sos@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/bio.h>
 #include <sys/endian.h>
 #include <sys/kernel.h>
 #include <sys/kobj.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/systm.h>
 #include <sys/taskqueue.h>
 #include <sys/disk.h>
 #include <geom/geom.h>
+#include <geom/geom_dbg.h>
 #include "geom/raid/g_raid.h"
 #include "g_raid_md_if.h"
 
 static MALLOC_DEFINE(M_MD_INTEL, "md_intel_data", "GEOM_RAID Intel metadata");
 
 struct intel_raid_map {
 	uint32_t	offset;
 	uint32_t	disk_sectors;
 	uint32_t	stripe_count;
 	uint16_t	strip_sectors;
 	uint8_t		status;
 #define INTEL_S_READY           0x00
 #define INTEL_S_UNINITIALIZED   0x01
 #define INTEL_S_DEGRADED        0x02
 #define INTEL_S_FAILURE         0x03
 
 	uint8_t		type;
 #define INTEL_T_RAID0           0x00
 #define INTEL_T_RAID1           0x01
 #define INTEL_T_RAID5           0x05
 
 	uint8_t		total_disks;
 	uint8_t		total_domains;
 	uint8_t		failed_disk_num;
 	uint8_t		ddf;
 	uint32_t	offset_hi;
 	uint32_t	disk_sectors_hi;
 	uint32_t	stripe_count_hi;
 	uint32_t	filler_2[4];
 	uint32_t	disk_idx[1];	/* total_disks entries. */
 #define INTEL_DI_IDX	0x00ffffff
 #define INTEL_DI_RBLD	0x01000000
 } __packed;
 
 struct intel_raid_vol {
 	uint8_t		name[16];
 	u_int64_t	total_sectors __packed;
 	uint32_t	state;
 #define INTEL_ST_BOOTABLE		0x00000001
 #define INTEL_ST_BOOT_DEVICE		0x00000002
 #define INTEL_ST_READ_COALESCING	0x00000004
 #define INTEL_ST_WRITE_COALESCING	0x00000008
 #define INTEL_ST_LAST_SHUTDOWN_DIRTY	0x00000010
 #define INTEL_ST_HIDDEN_AT_BOOT		0x00000020
 #define INTEL_ST_CURRENTLY_HIDDEN	0x00000040
 #define INTEL_ST_VERIFY_AND_FIX		0x00000080
 #define INTEL_ST_MAP_STATE_UNINIT	0x00000100
 #define INTEL_ST_NO_AUTO_RECOVERY	0x00000200
 #define INTEL_ST_CLONE_N_GO		0x00000400
 #define INTEL_ST_CLONE_MAN_SYNC		0x00000800
 #define INTEL_ST_CNG_MASTER_DISK_NUM	0x00001000
 	uint32_t	reserved;
 	uint8_t		migr_priority;
 	uint8_t		num_sub_vols;
 	uint8_t		tid;
 	uint8_t		cng_master_disk;
 	uint16_t	cache_policy;
 	uint8_t		cng_state;
 #define INTEL_CNGST_UPDATED		0
 #define INTEL_CNGST_NEEDS_UPDATE	1
 #define INTEL_CNGST_MASTER_MISSING	2
 	uint8_t		cng_sub_state;
 	uint32_t	filler_0[10];
 
 	uint32_t	curr_migr_unit;
 	uint32_t	checkpoint_id;
 	uint8_t		migr_state;
 	uint8_t		migr_type;
 #define INTEL_MT_INIT		0
 #define INTEL_MT_REBUILD	1
 #define INTEL_MT_VERIFY		2
 #define INTEL_MT_GEN_MIGR	3
 #define INTEL_MT_STATE_CHANGE	4
 #define INTEL_MT_REPAIR		5
 	uint8_t		dirty;
 	uint8_t		fs_state;
 	uint16_t	verify_errors;
 	uint16_t	bad_blocks;
 	uint32_t	curr_migr_unit_hi;
 	uint32_t	filler_1[3];
 	struct intel_raid_map map[1];	/* 2 entries if migr_state != 0. */
 } __packed;
 
 struct intel_raid_disk {
 #define INTEL_SERIAL_LEN	16
 	uint8_t		serial[INTEL_SERIAL_LEN];
 	uint32_t	sectors;
 	uint32_t	id;
 	uint32_t	flags;
 #define INTEL_F_SPARE		0x01
 #define INTEL_F_ASSIGNED	0x02
 #define INTEL_F_FAILED		0x04
 #define INTEL_F_ONLINE		0x08
 #define INTEL_F_DISABLED	0x80
 	uint32_t	owner_cfg_num;
 	uint32_t	sectors_hi;
 	uint32_t	filler[3];
 } __packed;
 
 struct intel_raid_conf {
 	uint8_t		intel_id[24];
 #define INTEL_MAGIC             "Intel Raid ISM Cfg Sig. "
 
 	uint8_t		version[6];
 #define INTEL_VERSION_1000	"1.0.00"	/* RAID0 */
 #define INTEL_VERSION_1100	"1.1.00"	/* RAID1 */
 #define INTEL_VERSION_1200	"1.2.00"	/* Many volumes */
 #define INTEL_VERSION_1201	"1.2.01"	/* 3 or 4 disks */
 #define INTEL_VERSION_1202	"1.2.02"	/* RAID5 */
 #define INTEL_VERSION_1204	"1.2.04"	/* 5 or 6 disks */
 #define INTEL_VERSION_1206	"1.2.06"	/* CNG */
 #define INTEL_VERSION_1300	"1.3.00"	/* Attributes */
 
 	uint8_t		dummy_0[2];
 	uint32_t	checksum;
 	uint32_t	config_size;
 	uint32_t	config_id;
 	uint32_t	generation;
 	uint32_t	error_log_size;
 	uint32_t	attributes;
 #define INTEL_ATTR_RAID0	0x00000001
 #define INTEL_ATTR_RAID1	0x00000002
 #define INTEL_ATTR_RAID10	0x00000004
 #define INTEL_ATTR_RAID1E	0x00000008
 #define INTEL_ATTR_RAID5	0x00000010
 #define INTEL_ATTR_RAIDCNG	0x00000020
 #define INTEL_ATTR_EXT_STRIP	0x00000040
 #define INTEL_ATTR_NVM_CACHE	0x02000000
 #define INTEL_ATTR_2TB_DISK	0x04000000
 #define INTEL_ATTR_BBM		0x08000000
 #define INTEL_ATTR_NVM_CACHE2	0x10000000
 #define INTEL_ATTR_2TB		0x20000000
 #define INTEL_ATTR_PM		0x40000000
 #define INTEL_ATTR_CHECKSUM	0x80000000
 
 	uint8_t		total_disks;
 	uint8_t		total_volumes;
 	uint8_t		error_log_pos;
 	uint8_t		dummy_2[1];
 	uint32_t	cache_size;
 	uint32_t	orig_config_id;
 	uint32_t	pwr_cycle_count;
 	uint32_t	bbm_log_size;
 	uint32_t	filler_0[35];
 	struct intel_raid_disk	disk[1];	/* total_disks entries. */
 	/* Here goes total_volumes of struct intel_raid_vol. */
 } __packed;
 
 #define INTEL_ATTR_SUPPORTED	( INTEL_ATTR_RAID0 | INTEL_ATTR_RAID1 |	\
     INTEL_ATTR_RAID10 | INTEL_ATTR_RAID1E | INTEL_ATTR_RAID5 |		\
     INTEL_ATTR_RAIDCNG | INTEL_ATTR_EXT_STRIP | INTEL_ATTR_2TB_DISK |	\
     INTEL_ATTR_2TB | INTEL_ATTR_PM | INTEL_ATTR_CHECKSUM )
 
 #define INTEL_MAX_MD_SIZE(ndisks)				\
     (sizeof(struct intel_raid_conf) +				\
      sizeof(struct intel_raid_disk) * (ndisks - 1) +		\
      sizeof(struct intel_raid_vol) * 2 +			\
      sizeof(struct intel_raid_map) * 2 +			\
      sizeof(uint32_t) * (ndisks - 1) * 4)
 
 struct g_raid_md_intel_perdisk {
 	struct intel_raid_conf	*pd_meta;
 	int			 pd_disk_pos;
 	struct intel_raid_disk	 pd_disk_meta;
 };
 
 struct g_raid_md_intel_pervolume {
 	int			 pv_volume_pos;
 	int			 pv_cng;
 	int			 pv_cng_man_sync;
 	int			 pv_cng_master_disk;
 };
 
 struct g_raid_md_intel_object {
 	struct g_raid_md_object	 mdio_base;
 	uint32_t		 mdio_config_id;
 	uint32_t		 mdio_orig_config_id;
 	uint32_t		 mdio_generation;
 	struct intel_raid_conf	*mdio_meta;
 	struct callout		 mdio_start_co;	/* STARTING state timer. */
 	int			 mdio_disks_present;
 	int			 mdio_started;
 	int			 mdio_incomplete;
 	struct root_hold_token	*mdio_rootmount; /* Root mount delay token. */
 };
 
 static g_raid_md_create_t g_raid_md_create_intel;
 static g_raid_md_taste_t g_raid_md_taste_intel;
 static g_raid_md_event_t g_raid_md_event_intel;
 static g_raid_md_ctl_t g_raid_md_ctl_intel;
 static g_raid_md_write_t g_raid_md_write_intel;
 static g_raid_md_fail_disk_t g_raid_md_fail_disk_intel;
 static g_raid_md_free_disk_t g_raid_md_free_disk_intel;
 static g_raid_md_free_volume_t g_raid_md_free_volume_intel;
 static g_raid_md_free_t g_raid_md_free_intel;
 
 static kobj_method_t g_raid_md_intel_methods[] = {
 	KOBJMETHOD(g_raid_md_create,	g_raid_md_create_intel),
 	KOBJMETHOD(g_raid_md_taste,	g_raid_md_taste_intel),
 	KOBJMETHOD(g_raid_md_event,	g_raid_md_event_intel),
 	KOBJMETHOD(g_raid_md_ctl,	g_raid_md_ctl_intel),
 	KOBJMETHOD(g_raid_md_write,	g_raid_md_write_intel),
 	KOBJMETHOD(g_raid_md_fail_disk,	g_raid_md_fail_disk_intel),
 	KOBJMETHOD(g_raid_md_free_disk,	g_raid_md_free_disk_intel),
 	KOBJMETHOD(g_raid_md_free_volume,	g_raid_md_free_volume_intel),
 	KOBJMETHOD(g_raid_md_free,	g_raid_md_free_intel),
 	{ 0, 0 }
 };
 
 static struct g_raid_md_class g_raid_md_intel_class = {
 	"Intel",
 	g_raid_md_intel_methods,
 	sizeof(struct g_raid_md_intel_object),
 	.mdc_enable = 1,
 	.mdc_priority = 100
 };
 
 
 static struct intel_raid_map *
 intel_get_map(struct intel_raid_vol *mvol, int i)
 {
 	struct intel_raid_map *mmap;
 
 	if (i > (mvol->migr_state ? 1 : 0))
 		return (NULL);
 	mmap = &mvol->map[0];
 	for (; i > 0; i--) {
 		mmap = (struct intel_raid_map *)
 		    &mmap->disk_idx[mmap->total_disks];
 	}
 	return ((struct intel_raid_map *)mmap);
 }
 
 static struct intel_raid_vol *
 intel_get_volume(struct intel_raid_conf *meta, int i)
 {
 	struct intel_raid_vol *mvol;
 	struct intel_raid_map *mmap;
 
 	if (i > 1)
 		return (NULL);
 	mvol = (struct intel_raid_vol *)&meta->disk[meta->total_disks];
 	for (; i > 0; i--) {
 		mmap = intel_get_map(mvol, mvol->migr_state ? 1 : 0);
 		mvol = (struct intel_raid_vol *)
 		    &mmap->disk_idx[mmap->total_disks];
 	}
 	return (mvol);
 }
 
 static off_t
 intel_get_map_offset(struct intel_raid_map *mmap)
 {
 	off_t offset = (off_t)mmap->offset_hi << 32;
 
 	offset += mmap->offset;
 	return (offset);
 }
 
 static void
 intel_set_map_offset(struct intel_raid_map *mmap, off_t offset)
 {
 
 	mmap->offset = offset & 0xffffffff;
 	mmap->offset_hi = offset >> 32;
 }
 
 static off_t
 intel_get_map_disk_sectors(struct intel_raid_map *mmap)
 {
 	off_t disk_sectors = (off_t)mmap->disk_sectors_hi << 32;
 
 	disk_sectors += mmap->disk_sectors;
 	return (disk_sectors);
 }
 
 static void
 intel_set_map_disk_sectors(struct intel_raid_map *mmap, off_t disk_sectors)
 {
 
 	mmap->disk_sectors = disk_sectors & 0xffffffff;
 	mmap->disk_sectors_hi = disk_sectors >> 32;
 }
 
 static void
 intel_set_map_stripe_count(struct intel_raid_map *mmap, off_t stripe_count)
 {
 
 	mmap->stripe_count = stripe_count & 0xffffffff;
 	mmap->stripe_count_hi = stripe_count >> 32;
 }
 
 static off_t
 intel_get_disk_sectors(struct intel_raid_disk *disk)
 {
 	off_t sectors = (off_t)disk->sectors_hi << 32;
 
 	sectors += disk->sectors;
 	return (sectors);
 }
 
 static void
 intel_set_disk_sectors(struct intel_raid_disk *disk, off_t sectors)
 {
 
 	disk->sectors = sectors & 0xffffffff;
 	disk->sectors_hi = sectors >> 32;
 }
 
 static off_t
 intel_get_vol_curr_migr_unit(struct intel_raid_vol *vol)
 {
 	off_t curr_migr_unit = (off_t)vol->curr_migr_unit_hi << 32;
 
 	curr_migr_unit += vol->curr_migr_unit;
 	return (curr_migr_unit);
 }
 
 static void
 intel_set_vol_curr_migr_unit(struct intel_raid_vol *vol, off_t curr_migr_unit)
 {
 
 	vol->curr_migr_unit = curr_migr_unit & 0xffffffff;
 	vol->curr_migr_unit_hi = curr_migr_unit >> 32;
 }
 
 static char *
 intel_status2str(int status)
 {
 
 	switch (status) {
 	case INTEL_S_READY:
 		return ("READY");
 	case INTEL_S_UNINITIALIZED:
 		return ("UNINITIALIZED");
 	case INTEL_S_DEGRADED:
 		return ("DEGRADED");
 	case INTEL_S_FAILURE:
 		return ("FAILURE");
 	default:
 		return ("UNKNOWN");
 	}
 }
 
 static char *
 intel_type2str(int type)
 {
 
 	switch (type) {
 	case INTEL_T_RAID0:
 		return ("RAID0");
 	case INTEL_T_RAID1:
 		return ("RAID1");
 	case INTEL_T_RAID5:
 		return ("RAID5");
 	default:
 		return ("UNKNOWN");
 	}
 }
 
 static char *
 intel_cngst2str(int cng_state)
 {
 
 	switch (cng_state) {
 	case INTEL_CNGST_UPDATED:
 		return ("UPDATED");
 	case INTEL_CNGST_NEEDS_UPDATE:
 		return ("NEEDS_UPDATE");
 	case INTEL_CNGST_MASTER_MISSING:
 		return ("MASTER_MISSING");
 	default:
 		return ("UNKNOWN");
 	}
 }
 
 static char *
 intel_mt2str(int type)
 {
 
 	switch (type) {
 	case INTEL_MT_INIT:
 		return ("INIT");
 	case INTEL_MT_REBUILD:
 		return ("REBUILD");
 	case INTEL_MT_VERIFY:
 		return ("VERIFY");
 	case INTEL_MT_GEN_MIGR:
 		return ("GEN_MIGR");
 	case INTEL_MT_STATE_CHANGE:
 		return ("STATE_CHANGE");
 	case INTEL_MT_REPAIR:
 		return ("REPAIR");
 	default:
 		return ("UNKNOWN");
 	}
 }
 
 static void
 g_raid_md_intel_print(struct intel_raid_conf *meta)
 {
 	struct intel_raid_vol *mvol;
 	struct intel_raid_map *mmap;
 	int i, j, k;
 
 	if (g_raid_debug < 1)
 		return;
 
 	printf("********* ATA Intel MatrixRAID Metadata *********\n");
 	printf("intel_id            <%.24s>\n", meta->intel_id);
 	printf("version             <%.6s>\n", meta->version);
 	printf("checksum            0x%08x\n", meta->checksum);
 	printf("config_size         0x%08x\n", meta->config_size);
 	printf("config_id           0x%08x\n", meta->config_id);
 	printf("generation          0x%08x\n", meta->generation);
 	printf("error_log_size      %d\n", meta->error_log_size);
 	printf("attributes          0x%b\n", meta->attributes,
 		"\020"
 		"\001RAID0"
 		"\002RAID1"
 		"\003RAID10"
 		"\004RAID1E"
 		"\005RAID15"
 		"\006RAIDCNG"
 		"\007EXT_STRIP"
 		"\032NVM_CACHE"
 		"\0332TB_DISK"
 		"\034BBM"
 		"\035NVM_CACHE"
 		"\0362TB"
 		"\037PM"
 		"\040CHECKSUM");
 	printf("total_disks         %u\n", meta->total_disks);
 	printf("total_volumes       %u\n", meta->total_volumes);
 	printf("error_log_pos       %u\n", meta->error_log_pos);
 	printf("cache_size          %u\n", meta->cache_size);
 	printf("orig_config_id      0x%08x\n", meta->orig_config_id);
 	printf("pwr_cycle_count     %u\n", meta->pwr_cycle_count);
 	printf("bbm_log_size        %u\n", meta->bbm_log_size);
 	printf("Flags: S - Spare, A - Assigned, F - Failed, O - Online, D - Disabled\n");
 	printf("DISK#   serial disk_sectors disk_sectors_hi disk_id flags owner\n");
 	for (i = 0; i < meta->total_disks; i++ ) {
 		printf("    %d   <%.16s> %u %u 0x%08x 0x%b %08x\n", i,
 		    meta->disk[i].serial, meta->disk[i].sectors,
 		    meta->disk[i].sectors_hi, meta->disk[i].id,
 		    meta->disk[i].flags, "\20\01S\02A\03F\04O\05D",
 		    meta->disk[i].owner_cfg_num);
 	}
 	for (i = 0; i < meta->total_volumes; i++) {
 		mvol = intel_get_volume(meta, i);
 		printf(" ****** Volume %d ******\n", i);
 		printf(" name               %.16s\n", mvol->name);
 		printf(" total_sectors      %ju\n", mvol->total_sectors);
 		printf(" state              0x%b\n", mvol->state,
 			"\020"
 			"\001BOOTABLE"
 			"\002BOOT_DEVICE"
 			"\003READ_COALESCING"
 			"\004WRITE_COALESCING"
 			"\005LAST_SHUTDOWN_DIRTY"
 			"\006HIDDEN_AT_BOOT"
 			"\007CURRENTLY_HIDDEN"
 			"\010VERIFY_AND_FIX"
 			"\011MAP_STATE_UNINIT"
 			"\012NO_AUTO_RECOVERY"
 			"\013CLONE_N_GO"
 			"\014CLONE_MAN_SYNC"
 			"\015CNG_MASTER_DISK_NUM");
 		printf(" reserved           %u\n", mvol->reserved);
 		printf(" migr_priority      %u\n", mvol->migr_priority);
 		printf(" num_sub_vols       %u\n", mvol->num_sub_vols);
 		printf(" tid                %u\n", mvol->tid);
 		printf(" cng_master_disk    %u\n", mvol->cng_master_disk);
 		printf(" cache_policy       %u\n", mvol->cache_policy);
 		printf(" cng_state          %u (%s)\n", mvol->cng_state,
 			intel_cngst2str(mvol->cng_state));
 		printf(" cng_sub_state      %u\n", mvol->cng_sub_state);
 		printf(" curr_migr_unit     %u\n", mvol->curr_migr_unit);
 		printf(" curr_migr_unit_hi  %u\n", mvol->curr_migr_unit_hi);
 		printf(" checkpoint_id      %u\n", mvol->checkpoint_id);
 		printf(" migr_state         %u\n", mvol->migr_state);
 		printf(" migr_type          %u (%s)\n", mvol->migr_type,
 			intel_mt2str(mvol->migr_type));
 		printf(" dirty              %u\n", mvol->dirty);
 		printf(" fs_state           %u\n", mvol->fs_state);
 		printf(" verify_errors      %u\n", mvol->verify_errors);
 		printf(" bad_blocks         %u\n", mvol->bad_blocks);
 
 		for (j = 0; j < (mvol->migr_state ? 2 : 1); j++) {
 			printf("  *** Map %d ***\n", j);
 			mmap = intel_get_map(mvol, j);
 			printf("  offset            %u\n", mmap->offset);
 			printf("  offset_hi         %u\n", mmap->offset_hi);
 			printf("  disk_sectors      %u\n", mmap->disk_sectors);
 			printf("  disk_sectors_hi   %u\n", mmap->disk_sectors_hi);
 			printf("  stripe_count      %u\n", mmap->stripe_count);
 			printf("  stripe_count_hi   %u\n", mmap->stripe_count_hi);
 			printf("  strip_sectors     %u\n", mmap->strip_sectors);
 			printf("  status            %u (%s)\n", mmap->status,
 				intel_status2str(mmap->status));
 			printf("  type              %u (%s)\n", mmap->type,
 				intel_type2str(mmap->type));
 			printf("  total_disks       %u\n", mmap->total_disks);
 			printf("  total_domains     %u\n", mmap->total_domains);
 			printf("  failed_disk_num   %u\n", mmap->failed_disk_num);
 			printf("  ddf               %u\n", mmap->ddf);
 			printf("  disk_idx         ");
 			for (k = 0; k < mmap->total_disks; k++)
 				printf(" 0x%08x", mmap->disk_idx[k]);
 			printf("\n");
 		}
 	}
 	printf("=================================================\n");
 }
 
 static struct intel_raid_conf *
 intel_meta_copy(struct intel_raid_conf *meta)
 {
 	struct intel_raid_conf *nmeta;
 
 	nmeta = malloc(meta->config_size, M_MD_INTEL, M_WAITOK);
 	memcpy(nmeta, meta, meta->config_size);
 	return (nmeta);
 }
 
 static int
 intel_meta_find_disk(struct intel_raid_conf *meta, char *serial)
 {
 	int pos;
 
 	for (pos = 0; pos < meta->total_disks; pos++) {
 		if (strncmp(meta->disk[pos].serial,
 		    serial, INTEL_SERIAL_LEN) == 0)
 			return (pos);
 	}
 	return (-1);
 }
 
 static struct intel_raid_conf *
 intel_meta_read(struct g_consumer *cp)
 {
 	struct g_provider *pp;
 	struct intel_raid_conf *meta;
 	struct intel_raid_vol *mvol;
 	struct intel_raid_map *mmap, *mmap1;
 	char *buf;
 	int error, i, j, k, left, size;
 	uint32_t checksum, *ptr;
 
 	pp = cp->provider;
 
 	/* Read the anchor sector. */
 	buf = g_read_data(cp,
 	    pp->mediasize - pp->sectorsize * 2, pp->sectorsize, &error);
 	if (buf == NULL) {
 		G_RAID_DEBUG(1, "Cannot read metadata from %s (error=%d).",
 		    pp->name, error);
 		return (NULL);
 	}
 	meta = (struct intel_raid_conf *)buf;
 
 	/* Check if this is an Intel RAID struct */
 	if (strncmp(meta->intel_id, INTEL_MAGIC, strlen(INTEL_MAGIC))) {
 		G_RAID_DEBUG(1, "Intel signature check failed on %s", pp->name);
 		g_free(buf);
 		return (NULL);
 	}
 	if (meta->config_size > 65536 ||
 	    meta->config_size < sizeof(struct intel_raid_conf)) {
 		G_RAID_DEBUG(1, "Intel metadata size looks wrong: %d",
 		    meta->config_size);
 		g_free(buf);
 		return (NULL);
 	}
 	size = meta->config_size;
 	meta = malloc(size, M_MD_INTEL, M_WAITOK);
 	memcpy(meta, buf, min(size, pp->sectorsize));
 	g_free(buf);
 
 	/* Read all the rest, if needed. */
 	if (meta->config_size > pp->sectorsize) {
 		left = (meta->config_size - 1) / pp->sectorsize;
 		buf = g_read_data(cp,
 		    pp->mediasize - pp->sectorsize * (2 + left),
 		    pp->sectorsize * left, &error);
 		if (buf == NULL) {
 			G_RAID_DEBUG(1, "Cannot read remaining metadata"
 			    " part from %s (error=%d).",
 			    pp->name, error);
 			free(meta, M_MD_INTEL);
 			return (NULL);
 		}
 		memcpy(((char *)meta) + pp->sectorsize, buf,
 		    pp->sectorsize * left);
 		g_free(buf);
 	}
 
 	/* Check metadata checksum. */
 	for (checksum = 0, ptr = (uint32_t *)meta, i = 0;
 	    i < (meta->config_size / sizeof(uint32_t)); i++) {
 		checksum += *ptr++;
 	}
 	checksum -= meta->checksum;
 	if (checksum != meta->checksum) {
 		G_RAID_DEBUG(1, "Intel checksum check failed on %s", pp->name);
 		free(meta, M_MD_INTEL);
 		return (NULL);
 	}
 
 	/* Validate metadata size. */
 	size = sizeof(struct intel_raid_conf) +
 	    sizeof(struct intel_raid_disk) * (meta->total_disks - 1) +
 	    sizeof(struct intel_raid_vol) * meta->total_volumes;
 	if (size > meta->config_size) {
 badsize:
 		G_RAID_DEBUG(1, "Intel metadata size incorrect %d < %d",
 		    meta->config_size, size);
 		free(meta, M_MD_INTEL);
 		return (NULL);
 	}
 	for (i = 0; i < meta->total_volumes; i++) {
 		mvol = intel_get_volume(meta, i);
 		mmap = intel_get_map(mvol, 0);
 		size += 4 * (mmap->total_disks - 1);
 		if (size > meta->config_size)
 			goto badsize;
 		if (mvol->migr_state) {
 			size += sizeof(struct intel_raid_map);
 			if (size > meta->config_size)
 				goto badsize;
 			mmap = intel_get_map(mvol, 1);
 			size += 4 * (mmap->total_disks - 1);
 			if (size > meta->config_size)
 				goto badsize;
 		}
 	}
 
 	g_raid_md_intel_print(meta);
 
 	if (strncmp(meta->version, INTEL_VERSION_1300, 6) > 0) {
 		G_RAID_DEBUG(1, "Intel unsupported version: '%.6s'",
 		    meta->version);
 		free(meta, M_MD_INTEL);
 		return (NULL);
 	}
 
 	if (strncmp(meta->version, INTEL_VERSION_1300, 6) >= 0 &&
 	    (meta->attributes & ~INTEL_ATTR_SUPPORTED) != 0) {
 		G_RAID_DEBUG(1, "Intel unsupported attributes: 0x%08x",
 		    meta->attributes & ~INTEL_ATTR_SUPPORTED);
 		free(meta, M_MD_INTEL);
 		return (NULL);
 	}
 
 	/* Validate disk indexes. */
 	for (i = 0; i < meta->total_volumes; i++) {
 		mvol = intel_get_volume(meta, i);
 		for (j = 0; j < (mvol->migr_state ? 2 : 1); j++) {
 			mmap = intel_get_map(mvol, j);
 			for (k = 0; k < mmap->total_disks; k++) {
 				if ((mmap->disk_idx[k] & INTEL_DI_IDX) >
 				    meta->total_disks) {
 					G_RAID_DEBUG(1, "Intel metadata disk"
 					    " index %d too big (>%d)",
 					    mmap->disk_idx[k] & INTEL_DI_IDX,
 					    meta->total_disks);
 					free(meta, M_MD_INTEL);
 					return (NULL);
 				}
 			}
 		}
 	}
 
 	/* Validate migration types. */
 	for (i = 0; i < meta->total_volumes; i++) {
 		mvol = intel_get_volume(meta, i);
 		/* Deny unknown migration types. */
 		if (mvol->migr_state &&
 		    mvol->migr_type != INTEL_MT_INIT &&
 		    mvol->migr_type != INTEL_MT_REBUILD &&
 		    mvol->migr_type != INTEL_MT_VERIFY &&
 		    mvol->migr_type != INTEL_MT_GEN_MIGR &&
 		    mvol->migr_type != INTEL_MT_REPAIR) {
 			G_RAID_DEBUG(1, "Intel metadata has unsupported"
 			    " migration type %d", mvol->migr_type);
 			free(meta, M_MD_INTEL);
 			return (NULL);
 		}
 		/* Deny general migrations except SINGLE->RAID1. */
 		if (mvol->migr_state &&
 		    mvol->migr_type == INTEL_MT_GEN_MIGR) {
 			mmap = intel_get_map(mvol, 0);
 			mmap1 = intel_get_map(mvol, 1);
 			if (mmap1->total_disks != 1 ||
 			    mmap->type != INTEL_T_RAID1 ||
 			    mmap->total_disks != 2 ||
 			    mmap->offset != mmap1->offset ||
 			    mmap->disk_sectors != mmap1->disk_sectors ||
 			    mmap->total_domains != mmap->total_disks ||
 			    mmap->offset_hi != mmap1->offset_hi ||
 			    mmap->disk_sectors_hi != mmap1->disk_sectors_hi ||
 			    (mmap->disk_idx[0] != mmap1->disk_idx[0] &&
 			     mmap->disk_idx[0] != mmap1->disk_idx[1])) {
 				G_RAID_DEBUG(1, "Intel metadata has unsupported"
 				    " variant of general migration");
 				free(meta, M_MD_INTEL);
 				return (NULL);
 			}
 		}
 	}
 
 	return (meta);
 }
 
 static int
 intel_meta_write(struct g_consumer *cp, struct intel_raid_conf *meta)
 {
 	struct g_provider *pp;
 	char *buf;
 	int error, i, sectors;
 	uint32_t checksum, *ptr;
 
 	pp = cp->provider;
 
 	/* Recalculate checksum for case if metadata were changed. */
 	meta->checksum = 0;
 	for (checksum = 0, ptr = (uint32_t *)meta, i = 0;
 	    i < (meta->config_size / sizeof(uint32_t)); i++) {
 		checksum += *ptr++;
 	}
 	meta->checksum = checksum;
 
 	/* Create and fill buffer. */
 	sectors = howmany(meta->config_size, pp->sectorsize);
 	buf = malloc(sectors * pp->sectorsize, M_MD_INTEL, M_WAITOK | M_ZERO);
 	if (sectors > 1) {
 		memcpy(buf, ((char *)meta) + pp->sectorsize,
 		    (sectors - 1) * pp->sectorsize);
 	}
 	memcpy(buf + (sectors - 1) * pp->sectorsize, meta, pp->sectorsize);
 
 	error = g_write_data(cp,
 	    pp->mediasize - pp->sectorsize * (1 + sectors),
 	    buf, pp->sectorsize * sectors);
 	if (error != 0) {
 		G_RAID_DEBUG(1, "Cannot write metadata to %s (error=%d).",
 		    pp->name, error);
 	}
 
 	free(buf, M_MD_INTEL);
 	return (error);
 }
 
 static int
 intel_meta_erase(struct g_consumer *cp)
 {
 	struct g_provider *pp;
 	char *buf;
 	int error;
 
 	pp = cp->provider;
 	buf = malloc(pp->sectorsize, M_MD_INTEL, M_WAITOK | M_ZERO);
 	error = g_write_data(cp,
 	    pp->mediasize - 2 * pp->sectorsize,
 	    buf, pp->sectorsize);
 	if (error != 0) {
 		G_RAID_DEBUG(1, "Cannot erase metadata on %s (error=%d).",
 		    pp->name, error);
 	}
 	free(buf, M_MD_INTEL);
 	return (error);
 }
 
 static int
 intel_meta_write_spare(struct g_consumer *cp, struct intel_raid_disk *d)
 {
 	struct intel_raid_conf *meta;
 	int error;
 
 	/* Fill anchor and single disk. */
 	meta = malloc(INTEL_MAX_MD_SIZE(1), M_MD_INTEL, M_WAITOK | M_ZERO);
 	memcpy(&meta->intel_id[0], INTEL_MAGIC, sizeof(INTEL_MAGIC) - 1);
 	memcpy(&meta->version[0], INTEL_VERSION_1000,
 	    sizeof(INTEL_VERSION_1000) - 1);
 	meta->config_size = INTEL_MAX_MD_SIZE(1);
 	meta->config_id = meta->orig_config_id = arc4random();
 	meta->generation = 1;
 	meta->total_disks = 1;
 	meta->disk[0] = *d;
 	error = intel_meta_write(cp, meta);
 	free(meta, M_MD_INTEL);
 	return (error);
 }
 
 static struct g_raid_disk *
 g_raid_md_intel_get_disk(struct g_raid_softc *sc, int id)
 {
 	struct g_raid_disk	*disk;
 	struct g_raid_md_intel_perdisk *pd;
 
 	TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 		pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
 		if (pd->pd_disk_pos == id)
 			break;
 	}
 	return (disk);
 }
 
 static int
 g_raid_md_intel_supported(int level, int qual, int disks, int force)
 {
 
 	switch (level) {
 	case G_RAID_VOLUME_RL_RAID0:
 		if (disks < 1)
 			return (0);
 		if (!force && (disks < 2 || disks > 6))
 			return (0);
 		break;
 	case G_RAID_VOLUME_RL_RAID1:
 		if (disks < 1)
 			return (0);
 		if (!force && (disks != 2))
 			return (0);
 		break;
 	case G_RAID_VOLUME_RL_RAID1E:
 		if (disks < 2)
 			return (0);
 		if (!force && (disks != 4))
 			return (0);
 		break;
 	case G_RAID_VOLUME_RL_RAID5:
 		if (disks < 3)
 			return (0);
 		if (!force && disks > 6)
 			return (0);
 		if (qual != G_RAID_VOLUME_RLQ_R5LA)
 			return (0);
 		break;
 	default:
 		return (0);
 	}
 	if (level != G_RAID_VOLUME_RL_RAID5 && qual != G_RAID_VOLUME_RLQ_NONE)
 		return (0);
 	return (1);
 }
 
 static struct g_raid_volume *
 g_raid_md_intel_get_volume(struct g_raid_softc *sc, int id)
 {
 	struct g_raid_volume	*mvol;
 	struct g_raid_md_intel_pervolume *pv;
 
 	TAILQ_FOREACH(mvol, &sc->sc_volumes, v_next) {
 		pv = mvol->v_md_data;
 		if (pv->pv_volume_pos == id)
 			break;
 	}
 	return (mvol);
 }
 
 static int
 g_raid_md_intel_start_disk(struct g_raid_disk *disk)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_subdisk *sd, *tmpsd;
 	struct g_raid_disk *olddisk, *tmpdisk;
 	struct g_raid_md_object *md;
 	struct g_raid_md_intel_object *mdi;
 	struct g_raid_md_intel_pervolume *pv;
 	struct g_raid_md_intel_perdisk *pd, *oldpd;
 	struct intel_raid_conf *meta;
 	struct intel_raid_vol *mvol;
 	struct intel_raid_map *mmap0, *mmap1;
 	int disk_pos, resurrection = 0, migr_global, i;
 
 	sc = disk->d_softc;
 	md = sc->sc_md;
 	mdi = (struct g_raid_md_intel_object *)md;
 	meta = mdi->mdio_meta;
 	pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
 	olddisk = NULL;
 
 	/* Find disk position in metadata by its serial. */
 	disk_pos = intel_meta_find_disk(meta, pd->pd_disk_meta.serial);
 	if (disk_pos < 0) {
 		G_RAID_DEBUG1(1, sc, "Unknown, probably new or stale disk");
 		/* Failed stale disk is useless for us. */
 		if ((pd->pd_disk_meta.flags & INTEL_F_FAILED) &&
 		    !(pd->pd_disk_meta.flags & INTEL_F_DISABLED)) {
 			g_raid_change_disk_state(disk, G_RAID_DISK_S_STALE_FAILED);
 			return (0);
 		}
 		/* If we are in the start process, that's all for now. */
 		if (!mdi->mdio_started)
 			goto nofit;
 		/*
 		 * If we have already started - try to get use of the disk.
 		 * Try to replace OFFLINE disks first, then FAILED.
 		 */
 		TAILQ_FOREACH(tmpdisk, &sc->sc_disks, d_next) {
 			if (tmpdisk->d_state != G_RAID_DISK_S_OFFLINE &&
 			    tmpdisk->d_state != G_RAID_DISK_S_FAILED)
 				continue;
 			/* Make sure this disk is big enough. */
 			TAILQ_FOREACH(sd, &tmpdisk->d_subdisks, sd_next) {
 				off_t disk_sectors = 
 				    intel_get_disk_sectors(&pd->pd_disk_meta);
 
 				if (sd->sd_offset + sd->sd_size + 4096 >
 				    disk_sectors * 512) {
 					G_RAID_DEBUG1(1, sc,
 					    "Disk too small (%llu < %llu)",
 					    (unsigned long long)
 					    disk_sectors * 512,
 					    (unsigned long long)
 					    sd->sd_offset + sd->sd_size + 4096);
 					break;
 				}
 			}
 			if (sd != NULL)
 				continue;
 			if (tmpdisk->d_state == G_RAID_DISK_S_OFFLINE) {
 				olddisk = tmpdisk;
 				break;
 			} else if (olddisk == NULL)
 				olddisk = tmpdisk;
 		}
 		if (olddisk == NULL) {
 nofit:
 			if (pd->pd_disk_meta.flags & INTEL_F_SPARE) {
 				g_raid_change_disk_state(disk,
 				    G_RAID_DISK_S_SPARE);
 				return (1);
 			} else {
 				g_raid_change_disk_state(disk,
 				    G_RAID_DISK_S_STALE);
 				return (0);
 			}
 		}
 		oldpd = (struct g_raid_md_intel_perdisk *)olddisk->d_md_data;
 		disk_pos = oldpd->pd_disk_pos;
 		resurrection = 1;
 	}
 
 	if (olddisk == NULL) {
 		/* Find placeholder by position. */
 		olddisk = g_raid_md_intel_get_disk(sc, disk_pos);
 		if (olddisk == NULL)
 			panic("No disk at position %d!", disk_pos);
 		if (olddisk->d_state != G_RAID_DISK_S_OFFLINE) {
 			G_RAID_DEBUG1(1, sc, "More than one disk for pos %d",
 			    disk_pos);
 			g_raid_change_disk_state(disk, G_RAID_DISK_S_STALE);
 			return (0);
 		}
 		oldpd = (struct g_raid_md_intel_perdisk *)olddisk->d_md_data;
 	}
 
 	/* Replace failed disk or placeholder with new disk. */
 	TAILQ_FOREACH_SAFE(sd, &olddisk->d_subdisks, sd_next, tmpsd) {
 		TAILQ_REMOVE(&olddisk->d_subdisks, sd, sd_next);
 		TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next);
 		sd->sd_disk = disk;
 	}
 	oldpd->pd_disk_pos = -2;
 	pd->pd_disk_pos = disk_pos;
 
 	/* If it was placeholder -- destroy it. */
 	if (olddisk->d_state == G_RAID_DISK_S_OFFLINE) {
 		g_raid_destroy_disk(olddisk);
 	} else {
 		/* Otherwise, make it STALE_FAILED. */
 		g_raid_change_disk_state(olddisk, G_RAID_DISK_S_STALE_FAILED);
 		/* Update global metadata just in case. */
 		memcpy(&meta->disk[disk_pos], &pd->pd_disk_meta,
 		    sizeof(struct intel_raid_disk));
 	}
 
 	/* Welcome the new disk. */
 	if ((meta->disk[disk_pos].flags & INTEL_F_DISABLED) &&
 	    !(pd->pd_disk_meta.flags & INTEL_F_SPARE))
 		g_raid_change_disk_state(disk, G_RAID_DISK_S_DISABLED);
 	else if (resurrection)
 		g_raid_change_disk_state(disk, G_RAID_DISK_S_ACTIVE);
 	else if (meta->disk[disk_pos].flags & INTEL_F_FAILED)
 		g_raid_change_disk_state(disk, G_RAID_DISK_S_FAILED);
 	else if (meta->disk[disk_pos].flags & INTEL_F_SPARE)
 		g_raid_change_disk_state(disk, G_RAID_DISK_S_SPARE);
 	else
 		g_raid_change_disk_state(disk, G_RAID_DISK_S_ACTIVE);
 	TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) {
 		pv = sd->sd_volume->v_md_data;
 		mvol = intel_get_volume(meta, pv->pv_volume_pos);
 		mmap0 = intel_get_map(mvol, 0);
 		if (mvol->migr_state)
 			mmap1 = intel_get_map(mvol, 1);
 		else
 			mmap1 = mmap0;
 
 		migr_global = 1;
 		for (i = 0; i < mmap0->total_disks; i++) {
 			if ((mmap0->disk_idx[i] & INTEL_DI_RBLD) == 0 &&
 			    (mmap1->disk_idx[i] & INTEL_DI_RBLD) != 0)
 				migr_global = 0;
 		}
 
 		if ((meta->disk[disk_pos].flags & INTEL_F_DISABLED) &&
 		    !(pd->pd_disk_meta.flags & INTEL_F_SPARE)) {
 			/* Disabled disk, useless. */
 			g_raid_change_subdisk_state(sd,
 			    G_RAID_SUBDISK_S_NONE);
 		} else if (resurrection) {
 			/* Stale disk, almost same as new. */
 			g_raid_change_subdisk_state(sd,
 			    G_RAID_SUBDISK_S_NEW);
 		} else if (meta->disk[disk_pos].flags & INTEL_F_FAILED) {
 			/* Failed disk, almost useless. */
 			g_raid_change_subdisk_state(sd,
 			    G_RAID_SUBDISK_S_FAILED);
 		} else if (mvol->migr_state == 0) {
 			if (mmap0->status == INTEL_S_UNINITIALIZED &&
 			    (!pv->pv_cng || pv->pv_cng_master_disk != disk_pos)) {
 				/* Freshly created uninitialized volume. */
 				g_raid_change_subdisk_state(sd,
 				    G_RAID_SUBDISK_S_UNINITIALIZED);
 			} else if (mmap0->disk_idx[sd->sd_pos] & INTEL_DI_RBLD) {
 				/* Freshly inserted disk. */
 				g_raid_change_subdisk_state(sd,
 				    G_RAID_SUBDISK_S_NEW);
 			} else if (mvol->dirty && (!pv->pv_cng ||
 			    pv->pv_cng_master_disk != disk_pos)) {
 				/* Dirty volume (unclean shutdown). */
 				g_raid_change_subdisk_state(sd,
 				    G_RAID_SUBDISK_S_STALE);
 			} else {
 				/* Up to date disk. */
 				g_raid_change_subdisk_state(sd,
 				    G_RAID_SUBDISK_S_ACTIVE);
 			}
 		} else if (mvol->migr_type == INTEL_MT_INIT ||
 			   mvol->migr_type == INTEL_MT_REBUILD) {
 			if (mmap0->disk_idx[sd->sd_pos] & INTEL_DI_RBLD) {
 				/* Freshly inserted disk. */
 				g_raid_change_subdisk_state(sd,
 				    G_RAID_SUBDISK_S_NEW);
 			} else if (mmap1->disk_idx[sd->sd_pos] & INTEL_DI_RBLD) {
 				/* Rebuilding disk. */
 				g_raid_change_subdisk_state(sd,
 				    G_RAID_SUBDISK_S_REBUILD);
 				if (mvol->dirty) {
 					sd->sd_rebuild_pos = 0;
 				} else {
 					sd->sd_rebuild_pos =
 					    intel_get_vol_curr_migr_unit(mvol) *
 					    sd->sd_volume->v_strip_size *
 					    mmap0->total_domains;
 				}
 			} else if (mvol->migr_type == INTEL_MT_INIT &&
 			    migr_global) {
 				/* Freshly created uninitialized volume. */
 				g_raid_change_subdisk_state(sd,
 				    G_RAID_SUBDISK_S_UNINITIALIZED);
 			} else if (mvol->dirty && (!pv->pv_cng ||
 			    pv->pv_cng_master_disk != disk_pos)) {
 				/* Dirty volume (unclean shutdown). */
 				g_raid_change_subdisk_state(sd,
 				    G_RAID_SUBDISK_S_STALE);
 			} else {
 				/* Up to date disk. */
 				g_raid_change_subdisk_state(sd,
 				    G_RAID_SUBDISK_S_ACTIVE);
 			}
 		} else if (mvol->migr_type == INTEL_MT_VERIFY ||
 			   mvol->migr_type == INTEL_MT_REPAIR) {
 			if (mmap0->disk_idx[sd->sd_pos] & INTEL_DI_RBLD) {
 				/* Freshly inserted disk. */
 				g_raid_change_subdisk_state(sd,
 				    G_RAID_SUBDISK_S_NEW);
 			} else if ((mmap1->disk_idx[sd->sd_pos] & INTEL_DI_RBLD) ||
 			    migr_global) {
 				/* Resyncing disk. */
 				g_raid_change_subdisk_state(sd,
 				    G_RAID_SUBDISK_S_RESYNC);
 				if (mvol->dirty) {
 					sd->sd_rebuild_pos = 0;
 				} else {
 					sd->sd_rebuild_pos =
 					    intel_get_vol_curr_migr_unit(mvol) *
 					    sd->sd_volume->v_strip_size *
 					    mmap0->total_domains;
 				}
 			} else if (mvol->dirty) {
 				/* Dirty volume (unclean shutdown). */
 				g_raid_change_subdisk_state(sd,
 				    G_RAID_SUBDISK_S_STALE);
 			} else {
 				/* Up to date disk. */
 				g_raid_change_subdisk_state(sd,
 				    G_RAID_SUBDISK_S_ACTIVE);
 			}
 		} else if (mvol->migr_type == INTEL_MT_GEN_MIGR) {
 			if ((mmap1->disk_idx[0] & INTEL_DI_IDX) != disk_pos) {
 				/* Freshly inserted disk. */
 				g_raid_change_subdisk_state(sd,
 				    G_RAID_SUBDISK_S_NEW);
 			} else {
 				/* Up to date disk. */
 				g_raid_change_subdisk_state(sd,
 				    G_RAID_SUBDISK_S_ACTIVE);
 			}
 		}
 		g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW,
 		    G_RAID_EVENT_SUBDISK);
 	}
 
 	/* Update status of our need for spare. */
 	if (mdi->mdio_started) {
 		mdi->mdio_incomplete =
 		    (g_raid_ndisks(sc, G_RAID_DISK_S_ACTIVE) +
 		     g_raid_ndisks(sc, G_RAID_DISK_S_DISABLED) <
 		     meta->total_disks);
 	}
 
 	return (resurrection);
 }
 
 static void
 g_disk_md_intel_retaste(void *arg, int pending)
 {
 
 	G_RAID_DEBUG(1, "Array is not complete, trying to retaste.");
 	g_retaste(&g_raid_class);
 	free(arg, M_MD_INTEL);
 }
 
 static void
 g_raid_md_intel_refill(struct g_raid_softc *sc)
 {
 	struct g_raid_md_object *md;
 	struct g_raid_md_intel_object *mdi;
 	struct intel_raid_conf *meta;
 	struct g_raid_disk *disk;
 	struct task *task;
 	int update, na;
 
 	md = sc->sc_md;
 	mdi = (struct g_raid_md_intel_object *)md;
 	meta = mdi->mdio_meta;
 	update = 0;
 	do {
 		/* Make sure we miss anything. */
 		na = g_raid_ndisks(sc, G_RAID_DISK_S_ACTIVE) +
 		    g_raid_ndisks(sc, G_RAID_DISK_S_DISABLED);
 		if (na == meta->total_disks)
 			break;
 
 		G_RAID_DEBUG1(1, md->mdo_softc,
 		    "Array is not complete (%d of %d), "
 		    "trying to refill.", na, meta->total_disks);
 
 		/* Try to get use some of STALE disks. */
 		TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 			if (disk->d_state == G_RAID_DISK_S_STALE) {
 				update += g_raid_md_intel_start_disk(disk);
 				if (disk->d_state == G_RAID_DISK_S_ACTIVE ||
 				    disk->d_state == G_RAID_DISK_S_DISABLED)
 					break;
 			}
 		}
 		if (disk != NULL)
 			continue;
 
 		/* Try to get use some of SPARE disks. */
 		TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 			if (disk->d_state == G_RAID_DISK_S_SPARE) {
 				update += g_raid_md_intel_start_disk(disk);
 				if (disk->d_state == G_RAID_DISK_S_ACTIVE)
 					break;
 			}
 		}
 	} while (disk != NULL);
 
 	/* Write new metadata if we changed something. */
 	if (update) {
 		g_raid_md_write_intel(md, NULL, NULL, NULL);
 		meta = mdi->mdio_meta;
 	}
 
 	/* Update status of our need for spare. */
 	mdi->mdio_incomplete = (g_raid_ndisks(sc, G_RAID_DISK_S_ACTIVE) +
 	    g_raid_ndisks(sc, G_RAID_DISK_S_DISABLED) < meta->total_disks);
 
 	/* Request retaste hoping to find spare. */
 	if (mdi->mdio_incomplete) {
 		task = malloc(sizeof(struct task),
 		    M_MD_INTEL, M_WAITOK | M_ZERO);
 		TASK_INIT(task, 0, g_disk_md_intel_retaste, task);
 		taskqueue_enqueue(taskqueue_swi, task);
 	}
 }
 
 static void
 g_raid_md_intel_start(struct g_raid_softc *sc)
 {
 	struct g_raid_md_object *md;
 	struct g_raid_md_intel_object *mdi;
 	struct g_raid_md_intel_pervolume *pv;
 	struct g_raid_md_intel_perdisk *pd;
 	struct intel_raid_conf *meta;
 	struct intel_raid_vol *mvol;
 	struct intel_raid_map *mmap;
 	struct g_raid_volume *vol;
 	struct g_raid_subdisk *sd;
 	struct g_raid_disk *disk;
 	int i, j, disk_pos;
 
 	md = sc->sc_md;
 	mdi = (struct g_raid_md_intel_object *)md;
 	meta = mdi->mdio_meta;
 
 	/* Create volumes and subdisks. */
 	for (i = 0; i < meta->total_volumes; i++) {
 		mvol = intel_get_volume(meta, i);
 		mmap = intel_get_map(mvol, 0);
 		vol = g_raid_create_volume(sc, mvol->name, mvol->tid - 1);
 		pv = malloc(sizeof(*pv), M_MD_INTEL, M_WAITOK | M_ZERO);
 		pv->pv_volume_pos = i;
 		pv->pv_cng = (mvol->state & INTEL_ST_CLONE_N_GO) != 0;
 		pv->pv_cng_man_sync = (mvol->state & INTEL_ST_CLONE_MAN_SYNC) != 0;
 		if (mvol->cng_master_disk < mmap->total_disks)
 			pv->pv_cng_master_disk = mvol->cng_master_disk;
 		vol->v_md_data = pv;
 		vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_NONE;
 		if (mmap->type == INTEL_T_RAID0)
 			vol->v_raid_level = G_RAID_VOLUME_RL_RAID0;
 		else if (mmap->type == INTEL_T_RAID1 &&
 		    mmap->total_domains >= 2 &&
 		    mmap->total_domains <= mmap->total_disks) {
 			/* Assume total_domains is correct. */
 			if (mmap->total_domains == mmap->total_disks)
 				vol->v_raid_level = G_RAID_VOLUME_RL_RAID1;
 			else
 				vol->v_raid_level = G_RAID_VOLUME_RL_RAID1E;
 		} else if (mmap->type == INTEL_T_RAID1) {
 			/* total_domains looks wrong. */
 			if (mmap->total_disks <= 2)
 				vol->v_raid_level = G_RAID_VOLUME_RL_RAID1;
 			else
 				vol->v_raid_level = G_RAID_VOLUME_RL_RAID1E;
 		} else if (mmap->type == INTEL_T_RAID5) {
 			vol->v_raid_level = G_RAID_VOLUME_RL_RAID5;
 			vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_R5LA;
 		} else
 			vol->v_raid_level = G_RAID_VOLUME_RL_UNKNOWN;
 		vol->v_strip_size = (u_int)mmap->strip_sectors * 512; //ZZZ
 		vol->v_disks_count = mmap->total_disks;
 		vol->v_mediasize = (off_t)mvol->total_sectors * 512; //ZZZ
 		vol->v_sectorsize = 512; //ZZZ
 		for (j = 0; j < vol->v_disks_count; j++) {
 			sd = &vol->v_subdisks[j];
 			sd->sd_offset = intel_get_map_offset(mmap) * 512; //ZZZ
 			sd->sd_size = intel_get_map_disk_sectors(mmap) * 512; //ZZZ
 		}
 		g_raid_start_volume(vol);
 	}
 
 	/* Create disk placeholders to store data for later writing. */
 	for (disk_pos = 0; disk_pos < meta->total_disks; disk_pos++) {
 		pd = malloc(sizeof(*pd), M_MD_INTEL, M_WAITOK | M_ZERO);
 		pd->pd_disk_pos = disk_pos;
 		pd->pd_disk_meta = meta->disk[disk_pos];
 		disk = g_raid_create_disk(sc);
 		disk->d_md_data = (void *)pd;
 		disk->d_state = G_RAID_DISK_S_OFFLINE;
 		for (i = 0; i < meta->total_volumes; i++) {
 			mvol = intel_get_volume(meta, i);
 			mmap = intel_get_map(mvol, 0);
 			for (j = 0; j < mmap->total_disks; j++) {
 				if ((mmap->disk_idx[j] & INTEL_DI_IDX) == disk_pos)
 					break;
 			}
 			if (j == mmap->total_disks)
 				continue;
 			vol = g_raid_md_intel_get_volume(sc, i);
 			sd = &vol->v_subdisks[j];
 			sd->sd_disk = disk;
 			TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next);
 		}
 	}
 
 	/* Make all disks found till the moment take their places. */
 	do {
 		TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 			if (disk->d_state == G_RAID_DISK_S_NONE) {
 				g_raid_md_intel_start_disk(disk);
 				break;
 			}
 		}
 	} while (disk != NULL);
 
 	mdi->mdio_started = 1;
 	G_RAID_DEBUG1(0, sc, "Array started.");
 	g_raid_md_write_intel(md, NULL, NULL, NULL);
 
 	/* Pickup any STALE/SPARE disks to refill array if needed. */
 	g_raid_md_intel_refill(sc);
 
 	TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
 		g_raid_event_send(vol, G_RAID_VOLUME_E_START,
 		    G_RAID_EVENT_VOLUME);
 	}
 
 	callout_stop(&mdi->mdio_start_co);
 	G_RAID_DEBUG1(1, sc, "root_mount_rel %p", mdi->mdio_rootmount);
 	root_mount_rel(mdi->mdio_rootmount);
 	mdi->mdio_rootmount = NULL;
 }
 
 static void
 g_raid_md_intel_new_disk(struct g_raid_disk *disk)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_md_object *md;
 	struct g_raid_md_intel_object *mdi;
 	struct intel_raid_conf *pdmeta;
 	struct g_raid_md_intel_perdisk *pd;
 
 	sc = disk->d_softc;
 	md = sc->sc_md;
 	mdi = (struct g_raid_md_intel_object *)md;
 	pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
 	pdmeta = pd->pd_meta;
 
 	if (mdi->mdio_started) {
 		if (g_raid_md_intel_start_disk(disk))
 			g_raid_md_write_intel(md, NULL, NULL, NULL);
 	} else {
 		/* If we haven't started yet - check metadata freshness. */
 		if (mdi->mdio_meta == NULL ||
 		    ((int32_t)(pdmeta->generation - mdi->mdio_generation)) > 0) {
 			G_RAID_DEBUG1(1, sc, "Newer disk");
 			if (mdi->mdio_meta != NULL)
 				free(mdi->mdio_meta, M_MD_INTEL);
 			mdi->mdio_meta = intel_meta_copy(pdmeta);
 			mdi->mdio_generation = mdi->mdio_meta->generation;
 			mdi->mdio_disks_present = 1;
 		} else if (pdmeta->generation == mdi->mdio_generation) {
 			mdi->mdio_disks_present++;
 			G_RAID_DEBUG1(1, sc, "Matching disk (%d of %d up)",
 			    mdi->mdio_disks_present,
 			    mdi->mdio_meta->total_disks);
 		} else {
 			G_RAID_DEBUG1(1, sc, "Older disk");
 		}
 		/* If we collected all needed disks - start array. */
 		if (mdi->mdio_disks_present == mdi->mdio_meta->total_disks)
 			g_raid_md_intel_start(sc);
 	}
 }
 
 static void
 g_raid_intel_go(void *arg)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_md_object *md;
 	struct g_raid_md_intel_object *mdi;
 
 	sc = arg;
 	md = sc->sc_md;
 	mdi = (struct g_raid_md_intel_object *)md;
 	if (!mdi->mdio_started) {
 		G_RAID_DEBUG1(0, sc, "Force array start due to timeout.");
 		g_raid_event_send(sc, G_RAID_NODE_E_START, 0);
 	}
 }
 
 static int
 g_raid_md_create_intel(struct g_raid_md_object *md, struct g_class *mp,
     struct g_geom **gp)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_md_intel_object *mdi;
 	char name[16];
 
 	mdi = (struct g_raid_md_intel_object *)md;
 	mdi->mdio_config_id = mdi->mdio_orig_config_id = arc4random();
 	mdi->mdio_generation = 0;
 	snprintf(name, sizeof(name), "Intel-%08x", mdi->mdio_config_id);
 	sc = g_raid_create_node(mp, name, md);
 	if (sc == NULL)
 		return (G_RAID_MD_TASTE_FAIL);
 	md->mdo_softc = sc;
 	*gp = sc->sc_geom;
 	return (G_RAID_MD_TASTE_NEW);
 }
 
 /*
  * Return the last N characters of the serial label.  The Linux and
  * ataraid(7) code always uses the last 16 characters of the label to
  * store into the Intel meta format.  Generalize this to N characters
  * since that's easy.  Labels can be up to 20 characters for SATA drives
  * and up 251 characters for SAS drives.  Since intel controllers don't
  * support SAS drives, just stick with the SATA limits for stack friendliness.
  */
 static int
 g_raid_md_get_label(struct g_consumer *cp, char *serial, int serlen)
 {
 	char serial_buffer[DISK_IDENT_SIZE];
 	int len, error;
 	
 	len = sizeof(serial_buffer);
 	error = g_io_getattr("GEOM::ident", cp, &len, serial_buffer);
 	if (error != 0)
 		return (error);
 	len = strlen(serial_buffer);
 	if (len > serlen)
 		len -= serlen;
 	else
 		len = 0;
 	strncpy(serial, serial_buffer + len, serlen);
 	return (0);
 }
 
 static int
 g_raid_md_taste_intel(struct g_raid_md_object *md, struct g_class *mp,
                               struct g_consumer *cp, struct g_geom **gp)
 {
 	struct g_consumer *rcp;
 	struct g_provider *pp;
 	struct g_raid_md_intel_object *mdi, *mdi1;
 	struct g_raid_softc *sc;
 	struct g_raid_disk *disk;
 	struct intel_raid_conf *meta;
 	struct g_raid_md_intel_perdisk *pd;
 	struct g_geom *geom;
 	int error, disk_pos, result, spare, len;
 	char serial[INTEL_SERIAL_LEN];
 	char name[16];
 	uint16_t vendor;
 
 	G_RAID_DEBUG(1, "Tasting Intel on %s", cp->provider->name);
 	mdi = (struct g_raid_md_intel_object *)md;
 	pp = cp->provider;
 
 	/* Read metadata from device. */
 	meta = NULL;
 	disk_pos = 0;
 	g_topology_unlock();
 	error = g_raid_md_get_label(cp, serial, sizeof(serial));
 	if (error != 0) {
 		G_RAID_DEBUG(1, "Cannot get serial number from %s (error=%d).",
 		    pp->name, error);
 		goto fail2;
 	}
 	vendor = 0xffff;
 	len = sizeof(vendor);
 	if (pp->geom->rank == 1)
 		g_io_getattr("GEOM::hba_vendor", cp, &len, &vendor);
 	meta = intel_meta_read(cp);
 	g_topology_lock();
 	if (meta == NULL) {
 		if (g_raid_aggressive_spare) {
 			if (vendor != 0x8086) {
 				G_RAID_DEBUG(1,
 				    "Intel vendor mismatch 0x%04x != 0x8086",
 				    vendor);
 			} else {
 				G_RAID_DEBUG(1,
 				    "No Intel metadata, forcing spare.");
 				spare = 2;
 				goto search;
 			}
 		}
 		return (G_RAID_MD_TASTE_FAIL);
 	}
 
 	/* Check this disk position in obtained metadata. */
 	disk_pos = intel_meta_find_disk(meta, serial);
 	if (disk_pos < 0) {
 		G_RAID_DEBUG(1, "Intel serial '%s' not found", serial);
 		goto fail1;
 	}
 	if (intel_get_disk_sectors(&meta->disk[disk_pos]) !=
 	    (pp->mediasize / pp->sectorsize)) {
 		G_RAID_DEBUG(1, "Intel size mismatch %ju != %ju",
 		    intel_get_disk_sectors(&meta->disk[disk_pos]),
 		    (off_t)(pp->mediasize / pp->sectorsize));
 		goto fail1;
 	}
 
 	G_RAID_DEBUG(1, "Intel disk position %d", disk_pos);
 	spare = meta->disk[disk_pos].flags & INTEL_F_SPARE;
 
 search:
 	/* Search for matching node. */
 	sc = NULL;
 	mdi1 = NULL;
 	LIST_FOREACH(geom, &mp->geom, geom) {
 		sc = geom->softc;
 		if (sc == NULL)
 			continue;
 		if (sc->sc_stopping != 0)
 			continue;
 		if (sc->sc_md->mdo_class != md->mdo_class)
 			continue;
 		mdi1 = (struct g_raid_md_intel_object *)sc->sc_md;
 		if (spare) {
 			if (mdi1->mdio_incomplete)
 				break;
 		} else {
 			if (mdi1->mdio_config_id == meta->config_id)
 				break;
 		}
 	}
 
 	/* Found matching node. */
 	if (geom != NULL) {
 		G_RAID_DEBUG(1, "Found matching array %s", sc->sc_name);
 		result = G_RAID_MD_TASTE_EXISTING;
 
 	} else if (spare) { /* Not found needy node -- left for later. */
 		G_RAID_DEBUG(1, "Spare is not needed at this time");
 		goto fail1;
 
 	} else { /* Not found matching node -- create one. */
 		result = G_RAID_MD_TASTE_NEW;
 		mdi->mdio_config_id = meta->config_id;
 		mdi->mdio_orig_config_id = meta->orig_config_id;
 		snprintf(name, sizeof(name), "Intel-%08x", meta->config_id);
 		sc = g_raid_create_node(mp, name, md);
 		md->mdo_softc = sc;
 		geom = sc->sc_geom;
 		callout_init(&mdi->mdio_start_co, 1);
 		callout_reset(&mdi->mdio_start_co, g_raid_start_timeout * hz,
 		    g_raid_intel_go, sc);
 		mdi->mdio_rootmount = root_mount_hold("GRAID-Intel");
 		G_RAID_DEBUG1(1, sc, "root_mount_hold %p", mdi->mdio_rootmount);
 	}
 
 	/* There is no return after this point, so we close passed consumer. */
 	g_access(cp, -1, 0, 0);
 
 	rcp = g_new_consumer(geom);
 	rcp->flags |= G_CF_DIRECT_RECEIVE;
 	g_attach(rcp, pp);
 	if (g_access(rcp, 1, 1, 1) != 0)
 		; //goto fail1;
 
 	g_topology_unlock();
 	sx_xlock(&sc->sc_lock);
 
 	pd = malloc(sizeof(*pd), M_MD_INTEL, M_WAITOK | M_ZERO);
 	pd->pd_meta = meta;
 	pd->pd_disk_pos = -1;
 	if (spare == 2) {
 		memcpy(&pd->pd_disk_meta.serial[0], serial, INTEL_SERIAL_LEN);
 		intel_set_disk_sectors(&pd->pd_disk_meta, 
 		    pp->mediasize / pp->sectorsize);
 		pd->pd_disk_meta.id = 0;
 		pd->pd_disk_meta.flags = INTEL_F_SPARE;
 	} else {
 		pd->pd_disk_meta = meta->disk[disk_pos];
 	}
 	disk = g_raid_create_disk(sc);
 	disk->d_md_data = (void *)pd;
 	disk->d_consumer = rcp;
 	rcp->private = disk;
 
 	g_raid_get_disk_info(disk);
 
 	g_raid_md_intel_new_disk(disk);
 
 	sx_xunlock(&sc->sc_lock);
 	g_topology_lock();
 	*gp = geom;
 	return (result);
 fail2:
 	g_topology_lock();
 fail1:
 	free(meta, M_MD_INTEL);
 	return (G_RAID_MD_TASTE_FAIL);
 }
 
 static int
 g_raid_md_event_intel(struct g_raid_md_object *md,
     struct g_raid_disk *disk, u_int event)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_subdisk *sd;
 	struct g_raid_md_intel_object *mdi;
 	struct g_raid_md_intel_perdisk *pd;
 
 	sc = md->mdo_softc;
 	mdi = (struct g_raid_md_intel_object *)md;
 	if (disk == NULL) {
 		switch (event) {
 		case G_RAID_NODE_E_START:
 			if (!mdi->mdio_started)
 				g_raid_md_intel_start(sc);
 			return (0);
 		}
 		return (-1);
 	}
 	pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
 	switch (event) {
 	case G_RAID_DISK_E_DISCONNECTED:
 		/* If disk was assigned, just update statuses. */
 		if (pd->pd_disk_pos >= 0) {
 			g_raid_change_disk_state(disk, G_RAID_DISK_S_OFFLINE);
 			if (disk->d_consumer) {
 				g_raid_kill_consumer(sc, disk->d_consumer);
 				disk->d_consumer = NULL;
 			}
 			TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) {
 				g_raid_change_subdisk_state(sd,
 				    G_RAID_SUBDISK_S_NONE);
 				g_raid_event_send(sd, G_RAID_SUBDISK_E_DISCONNECTED,
 				    G_RAID_EVENT_SUBDISK);
 			}
 		} else {
 			/* Otherwise -- delete. */
 			g_raid_change_disk_state(disk, G_RAID_DISK_S_NONE);
 			g_raid_destroy_disk(disk);
 		}
 
 		/* Write updated metadata to all disks. */
 		g_raid_md_write_intel(md, NULL, NULL, NULL);
 
 		/* Check if anything left except placeholders. */
 		if (g_raid_ndisks(sc, -1) ==
 		    g_raid_ndisks(sc, G_RAID_DISK_S_OFFLINE))
 			g_raid_destroy_node(sc, 0);
 		else
 			g_raid_md_intel_refill(sc);
 		return (0);
 	}
 	return (-2);
 }
 
 static int
 g_raid_md_ctl_intel(struct g_raid_md_object *md,
     struct gctl_req *req)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_volume *vol, *vol1;
 	struct g_raid_subdisk *sd;
 	struct g_raid_disk *disk;
 	struct g_raid_md_intel_object *mdi;
 	struct g_raid_md_intel_pervolume *pv;
 	struct g_raid_md_intel_perdisk *pd;
 	struct g_consumer *cp;
 	struct g_provider *pp;
 	char arg[16], serial[INTEL_SERIAL_LEN];
 	const char *nodename, *verb, *volname, *levelname, *diskname;
 	char *tmp;
 	int *nargs, *force;
 	off_t off, size, sectorsize, strip, disk_sectors;
 	intmax_t *sizearg, *striparg;
 	int numdisks, i, len, level, qual, update;
 	int error;
 
 	sc = md->mdo_softc;
 	mdi = (struct g_raid_md_intel_object *)md;
 	verb = gctl_get_param(req, "verb", NULL);
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	error = 0;
 	if (strcmp(verb, "label") == 0) {
 
 		if (*nargs < 4) {
 			gctl_error(req, "Invalid number of arguments.");
 			return (-1);
 		}
 		volname = gctl_get_asciiparam(req, "arg1");
 		if (volname == NULL) {
 			gctl_error(req, "No volume name.");
 			return (-2);
 		}
 		levelname = gctl_get_asciiparam(req, "arg2");
 		if (levelname == NULL) {
 			gctl_error(req, "No RAID level.");
 			return (-3);
 		}
 		if (strcasecmp(levelname, "RAID5") == 0)
 			levelname = "RAID5-LA";
 		if (g_raid_volume_str2level(levelname, &level, &qual)) {
 			gctl_error(req, "Unknown RAID level '%s'.", levelname);
 			return (-4);
 		}
 		numdisks = *nargs - 3;
 		force = gctl_get_paraml(req, "force", sizeof(*force));
 		if (!g_raid_md_intel_supported(level, qual, numdisks,
 		    force ? *force : 0)) {
 			gctl_error(req, "Unsupported RAID level "
 			    "(0x%02x/0x%02x), or number of disks (%d).",
 			    level, qual, numdisks);
 			return (-5);
 		}
 
 		/* Search for disks, connect them and probe. */
 		size = 0x7fffffffffffffffllu;
 		sectorsize = 0;
 		for (i = 0; i < numdisks; i++) {
 			snprintf(arg, sizeof(arg), "arg%d", i + 3);
 			diskname = gctl_get_asciiparam(req, arg);
 			if (diskname == NULL) {
 				gctl_error(req, "No disk name (%s).", arg);
 				error = -6;
 				break;
 			}
 			if (strcmp(diskname, "NONE") == 0) {
 				cp = NULL;
 				pp = NULL;
 			} else {
 				g_topology_lock();
 				cp = g_raid_open_consumer(sc, diskname);
 				if (cp == NULL) {
 					gctl_error(req, "Can't open disk '%s'.",
 					    diskname);
 					g_topology_unlock();
 					error = -7;
 					break;
 				}
 				pp = cp->provider;
 			}
 			pd = malloc(sizeof(*pd), M_MD_INTEL, M_WAITOK | M_ZERO);
 			pd->pd_disk_pos = i;
 			disk = g_raid_create_disk(sc);
 			disk->d_md_data = (void *)pd;
 			disk->d_consumer = cp;
 			if (cp == NULL) {
 				strcpy(&pd->pd_disk_meta.serial[0], "NONE");
 				pd->pd_disk_meta.id = 0xffffffff;
 				pd->pd_disk_meta.flags = INTEL_F_ASSIGNED;
 				continue;
 			}
 			cp->private = disk;
 			g_topology_unlock();
 
 			error = g_raid_md_get_label(cp,
 			    &pd->pd_disk_meta.serial[0], INTEL_SERIAL_LEN);
 			if (error != 0) {
 				gctl_error(req,
 				    "Can't get serial for provider '%s'.",
 				    diskname);
 				error = -8;
 				break;
 			}
 
 			g_raid_get_disk_info(disk);
 
 			intel_set_disk_sectors(&pd->pd_disk_meta,
 			    pp->mediasize / pp->sectorsize);
 			if (size > pp->mediasize)
 				size = pp->mediasize;
 			if (sectorsize < pp->sectorsize)
 				sectorsize = pp->sectorsize;
 			pd->pd_disk_meta.id = 0;
 			pd->pd_disk_meta.flags = INTEL_F_ASSIGNED | INTEL_F_ONLINE;
 		}
 		if (error != 0)
 			return (error);
 
 		if (sectorsize <= 0) {
 			gctl_error(req, "Can't get sector size.");
 			return (-8);
 		}
 
 		/* Reserve some space for metadata. */
 		size -= ((4096 + sectorsize - 1) / sectorsize) * sectorsize;
 
 		/* Handle size argument. */
 		len = sizeof(*sizearg);
 		sizearg = gctl_get_param(req, "size", &len);
 		if (sizearg != NULL && len == sizeof(*sizearg) &&
 		    *sizearg > 0) {
 			if (*sizearg > size) {
 				gctl_error(req, "Size too big %lld > %lld.",
 				    (long long)*sizearg, (long long)size);
 				return (-9);
 			}
 			size = *sizearg;
 		}
 
 		/* Handle strip argument. */
 		strip = 131072;
 		len = sizeof(*striparg);
 		striparg = gctl_get_param(req, "strip", &len);
 		if (striparg != NULL && len == sizeof(*striparg) &&
 		    *striparg > 0) {
 			if (*striparg < sectorsize) {
 				gctl_error(req, "Strip size too small.");
 				return (-10);
 			}
 			if (*striparg % sectorsize != 0) {
 				gctl_error(req, "Incorrect strip size.");
 				return (-11);
 			}
 			if (strip > 65535 * sectorsize) {
 				gctl_error(req, "Strip size too big.");
 				return (-12);
 			}
 			strip = *striparg;
 		}
 
 		/* Round size down to strip or sector. */
 		if (level == G_RAID_VOLUME_RL_RAID1)
 			size -= (size % sectorsize);
 		else if (level == G_RAID_VOLUME_RL_RAID1E &&
 		    (numdisks & 1) != 0)
 			size -= (size % (2 * strip));
 		else
 			size -= (size % strip);
 		if (size <= 0) {
 			gctl_error(req, "Size too small.");
 			return (-13);
 		}
 
 		/* We have all we need, create things: volume, ... */
 		mdi->mdio_started = 1;
 		vol = g_raid_create_volume(sc, volname, -1);
 		pv = malloc(sizeof(*pv), M_MD_INTEL, M_WAITOK | M_ZERO);
 		pv->pv_volume_pos = 0;
 		vol->v_md_data = pv;
 		vol->v_raid_level = level;
 		vol->v_raid_level_qualifier = qual;
 		vol->v_strip_size = strip;
 		vol->v_disks_count = numdisks;
 		if (level == G_RAID_VOLUME_RL_RAID0)
 			vol->v_mediasize = size * numdisks;
 		else if (level == G_RAID_VOLUME_RL_RAID1)
 			vol->v_mediasize = size;
 		else if (level == G_RAID_VOLUME_RL_RAID5)
 			vol->v_mediasize = size * (numdisks - 1);
 		else { /* RAID1E */
 			vol->v_mediasize = ((size * numdisks) / strip / 2) *
 			    strip;
 		}
 		vol->v_sectorsize = sectorsize;
 		g_raid_start_volume(vol);
 
 		/* , and subdisks. */
 		TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 			pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
 			sd = &vol->v_subdisks[pd->pd_disk_pos];
 			sd->sd_disk = disk;
 			sd->sd_offset = 0;
 			sd->sd_size = size;
 			TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next);
 			if (sd->sd_disk->d_consumer != NULL) {
 				g_raid_change_disk_state(disk,
 				    G_RAID_DISK_S_ACTIVE);
 				if (level == G_RAID_VOLUME_RL_RAID5)
 					g_raid_change_subdisk_state(sd,
 					    G_RAID_SUBDISK_S_UNINITIALIZED);
 				else
 					g_raid_change_subdisk_state(sd,
 					    G_RAID_SUBDISK_S_ACTIVE);
 				g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW,
 				    G_RAID_EVENT_SUBDISK);
 			} else {
 				g_raid_change_disk_state(disk, G_RAID_DISK_S_OFFLINE);
 			}
 		}
 
 		/* Write metadata based on created entities. */
 		G_RAID_DEBUG1(0, sc, "Array started.");
 		g_raid_md_write_intel(md, NULL, NULL, NULL);
 
 		/* Pickup any STALE/SPARE disks to refill array if needed. */
 		g_raid_md_intel_refill(sc);
 
 		g_raid_event_send(vol, G_RAID_VOLUME_E_START,
 		    G_RAID_EVENT_VOLUME);
 		return (0);
 	}
 	if (strcmp(verb, "add") == 0) {
 
 		if (*nargs != 3) {
 			gctl_error(req, "Invalid number of arguments.");
 			return (-1);
 		}
 		volname = gctl_get_asciiparam(req, "arg1");
 		if (volname == NULL) {
 			gctl_error(req, "No volume name.");
 			return (-2);
 		}
 		levelname = gctl_get_asciiparam(req, "arg2");
 		if (levelname == NULL) {
 			gctl_error(req, "No RAID level.");
 			return (-3);
 		}
 		if (strcasecmp(levelname, "RAID5") == 0)
 			levelname = "RAID5-LA";
 		if (g_raid_volume_str2level(levelname, &level, &qual)) {
 			gctl_error(req, "Unknown RAID level '%s'.", levelname);
 			return (-4);
 		}
 
 		/* Look for existing volumes. */
 		i = 0;
 		vol1 = NULL;
 		TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
 			vol1 = vol;
 			i++;
 		}
 		if (i > 1) {
 			gctl_error(req, "Maximum two volumes supported.");
 			return (-6);
 		}
 		if (vol1 == NULL) {
 			gctl_error(req, "At least one volume must exist.");
 			return (-7);
 		}
 
 		numdisks = vol1->v_disks_count;
 		force = gctl_get_paraml(req, "force", sizeof(*force));
 		if (!g_raid_md_intel_supported(level, qual, numdisks,
 		    force ? *force : 0)) {
 			gctl_error(req, "Unsupported RAID level "
 			    "(0x%02x/0x%02x), or number of disks (%d).",
 			    level, qual, numdisks);
 			return (-5);
 		}
 
 		/* Collect info about present disks. */
 		size = 0x7fffffffffffffffllu;
 		sectorsize = 512;
 		for (i = 0; i < numdisks; i++) {
 			disk = vol1->v_subdisks[i].sd_disk;
 			pd = (struct g_raid_md_intel_perdisk *)
 			    disk->d_md_data;
 			disk_sectors = 
 			    intel_get_disk_sectors(&pd->pd_disk_meta);
 
 			if (disk_sectors * 512 < size)
 				size = disk_sectors * 512;
 			if (disk->d_consumer != NULL &&
 			    disk->d_consumer->provider != NULL &&
 			    disk->d_consumer->provider->sectorsize >
 			     sectorsize) {
 				sectorsize =
 				    disk->d_consumer->provider->sectorsize;
 			}
 		}
 
 		/* Reserve some space for metadata. */
 		size -= ((4096 + sectorsize - 1) / sectorsize) * sectorsize;
 
 		/* Decide insert before or after. */
 		sd = &vol1->v_subdisks[0];
 		if (sd->sd_offset >
 		    size - (sd->sd_offset + sd->sd_size)) {
 			off = 0;
 			size = sd->sd_offset;
 		} else {
 			off = sd->sd_offset + sd->sd_size;
 			size = size - (sd->sd_offset + sd->sd_size);
 		}
 
 		/* Handle strip argument. */
 		strip = 131072;
 		len = sizeof(*striparg);
 		striparg = gctl_get_param(req, "strip", &len);
 		if (striparg != NULL && len == sizeof(*striparg) &&
 		    *striparg > 0) {
 			if (*striparg < sectorsize) {
 				gctl_error(req, "Strip size too small.");
 				return (-10);
 			}
 			if (*striparg % sectorsize != 0) {
 				gctl_error(req, "Incorrect strip size.");
 				return (-11);
 			}
 			if (strip > 65535 * sectorsize) {
 				gctl_error(req, "Strip size too big.");
 				return (-12);
 			}
 			strip = *striparg;
 		}
 
 		/* Round offset up to strip. */
 		if (off % strip != 0) {
 			size -= strip - off % strip;
 			off += strip - off % strip;
 		}
 
 		/* Handle size argument. */
 		len = sizeof(*sizearg);
 		sizearg = gctl_get_param(req, "size", &len);
 		if (sizearg != NULL && len == sizeof(*sizearg) &&
 		    *sizearg > 0) {
 			if (*sizearg > size) {
 				gctl_error(req, "Size too big %lld > %lld.",
 				    (long long)*sizearg, (long long)size);
 				return (-9);
 			}
 			size = *sizearg;
 		}
 
 		/* Round size down to strip or sector. */
 		if (level == G_RAID_VOLUME_RL_RAID1)
 			size -= (size % sectorsize);
 		else
 			size -= (size % strip);
 		if (size <= 0) {
 			gctl_error(req, "Size too small.");
 			return (-13);
 		}
 		if (size > 0xffffffffllu * sectorsize) {
 			gctl_error(req, "Size too big.");
 			return (-14);
 		}
 
 		/* We have all we need, create things: volume, ... */
 		vol = g_raid_create_volume(sc, volname, -1);
 		pv = malloc(sizeof(*pv), M_MD_INTEL, M_WAITOK | M_ZERO);
 		pv->pv_volume_pos = i;
 		vol->v_md_data = pv;
 		vol->v_raid_level = level;
 		vol->v_raid_level_qualifier = qual;
 		vol->v_strip_size = strip;
 		vol->v_disks_count = numdisks;
 		if (level == G_RAID_VOLUME_RL_RAID0)
 			vol->v_mediasize = size * numdisks;
 		else if (level == G_RAID_VOLUME_RL_RAID1)
 			vol->v_mediasize = size;
 		else if (level == G_RAID_VOLUME_RL_RAID5)
 			vol->v_mediasize = size * (numdisks - 1);
 		else { /* RAID1E */
 			vol->v_mediasize = ((size * numdisks) / strip / 2) *
 			    strip;
 		}
 		vol->v_sectorsize = sectorsize;
 		g_raid_start_volume(vol);
 
 		/* , and subdisks. */
 		for (i = 0; i < numdisks; i++) {
 			disk = vol1->v_subdisks[i].sd_disk;
 			sd = &vol->v_subdisks[i];
 			sd->sd_disk = disk;
 			sd->sd_offset = off;
 			sd->sd_size = size;
 			TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next);
 			if (disk->d_state == G_RAID_DISK_S_ACTIVE) {
 				if (level == G_RAID_VOLUME_RL_RAID5)
 					g_raid_change_subdisk_state(sd,
 					    G_RAID_SUBDISK_S_UNINITIALIZED);
 				else
 					g_raid_change_subdisk_state(sd,
 					    G_RAID_SUBDISK_S_ACTIVE);
 				g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW,
 				    G_RAID_EVENT_SUBDISK);
 			}
 		}
 
 		/* Write metadata based on created entities. */
 		g_raid_md_write_intel(md, NULL, NULL, NULL);
 
 		g_raid_event_send(vol, G_RAID_VOLUME_E_START,
 		    G_RAID_EVENT_VOLUME);
 		return (0);
 	}
 	if (strcmp(verb, "delete") == 0) {
 
 		nodename = gctl_get_asciiparam(req, "arg0");
 		if (nodename != NULL && strcasecmp(sc->sc_name, nodename) != 0)
 			nodename = NULL;
 
 		/* Full node destruction. */
 		if (*nargs == 1 && nodename != NULL) {
 			/* Check if some volume is still open. */
 			force = gctl_get_paraml(req, "force", sizeof(*force));
 			if (force != NULL && *force == 0 &&
 			    g_raid_nopens(sc) != 0) {
 				gctl_error(req, "Some volume is still open.");
 				return (-4);
 			}
 
 			TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 				if (disk->d_consumer)
 					intel_meta_erase(disk->d_consumer);
 			}
 			g_raid_destroy_node(sc, 0);
 			return (0);
 		}
 
 		/* Destroy specified volume. If it was last - all node. */
 		if (*nargs > 2) {
 			gctl_error(req, "Invalid number of arguments.");
 			return (-1);
 		}
 		volname = gctl_get_asciiparam(req,
 		    nodename != NULL ? "arg1" : "arg0");
 		if (volname == NULL) {
 			gctl_error(req, "No volume name.");
 			return (-2);
 		}
 
 		/* Search for volume. */
 		TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
 			if (strcmp(vol->v_name, volname) == 0)
 				break;
 			pp = vol->v_provider;
 			if (pp == NULL)
 				continue;
 			if (strcmp(pp->name, volname) == 0)
 				break;
 			if (strncmp(pp->name, "raid/", 5) == 0 &&
 			    strcmp(pp->name + 5, volname) == 0)
 				break;
 		}
 		if (vol == NULL) {
 			i = strtol(volname, &tmp, 10);
 			if (verb != volname && tmp[0] == 0) {
 				TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
 					if (vol->v_global_id == i)
 						break;
 				}
 			}
 		}
 		if (vol == NULL) {
 			gctl_error(req, "Volume '%s' not found.", volname);
 			return (-3);
 		}
 
 		/* Check if volume is still open. */
 		force = gctl_get_paraml(req, "force", sizeof(*force));
 		if (force != NULL && *force == 0 &&
 		    vol->v_provider_open != 0) {
 			gctl_error(req, "Volume is still open.");
 			return (-4);
 		}
 
 		/* Destroy volume and potentially node. */
 		i = 0;
 		TAILQ_FOREACH(vol1, &sc->sc_volumes, v_next)
 			i++;
 		if (i >= 2) {
 			g_raid_destroy_volume(vol);
 			g_raid_md_write_intel(md, NULL, NULL, NULL);
 		} else {
 			TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 				if (disk->d_consumer)
 					intel_meta_erase(disk->d_consumer);
 			}
 			g_raid_destroy_node(sc, 0);
 		}
 		return (0);
 	}
 	if (strcmp(verb, "remove") == 0 ||
 	    strcmp(verb, "fail") == 0) {
 		if (*nargs < 2) {
 			gctl_error(req, "Invalid number of arguments.");
 			return (-1);
 		}
 		for (i = 1; i < *nargs; i++) {
 			snprintf(arg, sizeof(arg), "arg%d", i);
 			diskname = gctl_get_asciiparam(req, arg);
 			if (diskname == NULL) {
 				gctl_error(req, "No disk name (%s).", arg);
 				error = -2;
 				break;
 			}
 			if (strncmp(diskname, "/dev/", 5) == 0)
 				diskname += 5;
 
 			TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 				if (disk->d_consumer != NULL && 
 				    disk->d_consumer->provider != NULL &&
 				    strcmp(disk->d_consumer->provider->name,
 				     diskname) == 0)
 					break;
 			}
 			if (disk == NULL) {
 				gctl_error(req, "Disk '%s' not found.",
 				    diskname);
 				error = -3;
 				break;
 			}
 
 			if (strcmp(verb, "fail") == 0) {
 				g_raid_md_fail_disk_intel(md, NULL, disk);
 				continue;
 			}
 
 			pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
 
 			/* Erase metadata on deleting disk. */
 			intel_meta_erase(disk->d_consumer);
 
 			/* If disk was assigned, just update statuses. */
 			if (pd->pd_disk_pos >= 0) {
 				g_raid_change_disk_state(disk, G_RAID_DISK_S_OFFLINE);
 				g_raid_kill_consumer(sc, disk->d_consumer);
 				disk->d_consumer = NULL;
 				TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) {
 					g_raid_change_subdisk_state(sd,
 					    G_RAID_SUBDISK_S_NONE);
 					g_raid_event_send(sd, G_RAID_SUBDISK_E_DISCONNECTED,
 					    G_RAID_EVENT_SUBDISK);
 				}
 			} else {
 				/* Otherwise -- delete. */
 				g_raid_change_disk_state(disk, G_RAID_DISK_S_NONE);
 				g_raid_destroy_disk(disk);
 			}
 		}
 
 		/* Write updated metadata to remaining disks. */
 		g_raid_md_write_intel(md, NULL, NULL, NULL);
 
 		/* Check if anything left except placeholders. */
 		if (g_raid_ndisks(sc, -1) ==
 		    g_raid_ndisks(sc, G_RAID_DISK_S_OFFLINE))
 			g_raid_destroy_node(sc, 0);
 		else
 			g_raid_md_intel_refill(sc);
 		return (error);
 	}
 	if (strcmp(verb, "insert") == 0) {
 		if (*nargs < 2) {
 			gctl_error(req, "Invalid number of arguments.");
 			return (-1);
 		}
 		update = 0;
 		for (i = 1; i < *nargs; i++) {
 			/* Get disk name. */
 			snprintf(arg, sizeof(arg), "arg%d", i);
 			diskname = gctl_get_asciiparam(req, arg);
 			if (diskname == NULL) {
 				gctl_error(req, "No disk name (%s).", arg);
 				error = -3;
 				break;
 			}
 
 			/* Try to find provider with specified name. */
 			g_topology_lock();
 			cp = g_raid_open_consumer(sc, diskname);
 			if (cp == NULL) {
 				gctl_error(req, "Can't open disk '%s'.",
 				    diskname);
 				g_topology_unlock();
 				error = -4;
 				break;
 			}
 			pp = cp->provider;
 			g_topology_unlock();
 
 			/* Read disk serial. */
 			error = g_raid_md_get_label(cp,
 			    &serial[0], INTEL_SERIAL_LEN);
 			if (error != 0) {
 				gctl_error(req,
 				    "Can't get serial for provider '%s'.",
 				    diskname);
 				g_raid_kill_consumer(sc, cp);
 				error = -7;
 				break;
 			}
 
 			pd = malloc(sizeof(*pd), M_MD_INTEL, M_WAITOK | M_ZERO);
 			pd->pd_disk_pos = -1;
 
 			disk = g_raid_create_disk(sc);
 			disk->d_consumer = cp;
 			disk->d_md_data = (void *)pd;
 			cp->private = disk;
 
 			g_raid_get_disk_info(disk);
 
 			memcpy(&pd->pd_disk_meta.serial[0], &serial[0],
 			    INTEL_SERIAL_LEN);
 			intel_set_disk_sectors(&pd->pd_disk_meta,
 			    pp->mediasize / pp->sectorsize);
 			pd->pd_disk_meta.id = 0;
 			pd->pd_disk_meta.flags = INTEL_F_SPARE;
 
 			/* Welcome the "new" disk. */
 			update += g_raid_md_intel_start_disk(disk);
 			if (disk->d_state == G_RAID_DISK_S_SPARE) {
 				intel_meta_write_spare(cp, &pd->pd_disk_meta);
 				g_raid_destroy_disk(disk);
 			} else if (disk->d_state != G_RAID_DISK_S_ACTIVE) {
 				gctl_error(req, "Disk '%s' doesn't fit.",
 				    diskname);
 				g_raid_destroy_disk(disk);
 				error = -8;
 				break;
 			}
 		}
 
 		/* Write new metadata if we changed something. */
 		if (update)
 			g_raid_md_write_intel(md, NULL, NULL, NULL);
 		return (error);
 	}
 	return (-100);
 }
 
 static int
 g_raid_md_write_intel(struct g_raid_md_object *md, struct g_raid_volume *tvol,
     struct g_raid_subdisk *tsd, struct g_raid_disk *tdisk)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_volume *vol;
 	struct g_raid_subdisk *sd;
 	struct g_raid_disk *disk;
 	struct g_raid_md_intel_object *mdi;
 	struct g_raid_md_intel_pervolume *pv;
 	struct g_raid_md_intel_perdisk *pd;
 	struct intel_raid_conf *meta;
 	struct intel_raid_vol *mvol;
 	struct intel_raid_map *mmap0, *mmap1;
 	off_t sectorsize = 512, pos;
 	const char *version, *cv;
 	int vi, sdi, numdisks, len, state, stale;
 
 	sc = md->mdo_softc;
 	mdi = (struct g_raid_md_intel_object *)md;
 
 	if (sc->sc_stopping == G_RAID_DESTROY_HARD)
 		return (0);
 
 	/* Bump generation. Newly written metadata may differ from previous. */
 	mdi->mdio_generation++;
 
 	/* Count number of disks. */
 	numdisks = 0;
 	TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 		pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
 		if (pd->pd_disk_pos < 0)
 			continue;
 		numdisks++;
 		if (disk->d_state == G_RAID_DISK_S_ACTIVE) {
 			pd->pd_disk_meta.flags =
 			    INTEL_F_ONLINE | INTEL_F_ASSIGNED;
 		} else if (disk->d_state == G_RAID_DISK_S_FAILED) {
 			pd->pd_disk_meta.flags = INTEL_F_FAILED |
 			    INTEL_F_ASSIGNED;
 		} else if (disk->d_state == G_RAID_DISK_S_DISABLED) {
 			pd->pd_disk_meta.flags = INTEL_F_FAILED |
 			    INTEL_F_ASSIGNED | INTEL_F_DISABLED;
 		} else {
 			if (!(pd->pd_disk_meta.flags & INTEL_F_DISABLED))
 				pd->pd_disk_meta.flags = INTEL_F_ASSIGNED;
 			if (pd->pd_disk_meta.id != 0xffffffff) {
 				pd->pd_disk_meta.id = 0xffffffff;
 				len = strlen(pd->pd_disk_meta.serial);
 				len = min(len, INTEL_SERIAL_LEN - 3);
 				strcpy(pd->pd_disk_meta.serial + len, ":0");
 			}
 		}
 	}
 
 	/* Fill anchor and disks. */
 	meta = malloc(INTEL_MAX_MD_SIZE(numdisks),
 	    M_MD_INTEL, M_WAITOK | M_ZERO);
 	memcpy(&meta->intel_id[0], INTEL_MAGIC, sizeof(INTEL_MAGIC) - 1);
 	meta->config_size = INTEL_MAX_MD_SIZE(numdisks);
 	meta->config_id = mdi->mdio_config_id;
 	meta->orig_config_id = mdi->mdio_orig_config_id;
 	meta->generation = mdi->mdio_generation;
 	meta->attributes = INTEL_ATTR_CHECKSUM;
 	meta->total_disks = numdisks;
 	TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 		pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
 		if (pd->pd_disk_pos < 0)
 			continue;
 		meta->disk[pd->pd_disk_pos] = pd->pd_disk_meta;
 		if (pd->pd_disk_meta.sectors_hi != 0)
 			meta->attributes |= INTEL_ATTR_2TB_DISK;
 	}
 
 	/* Fill volumes and maps. */
 	vi = 0;
 	version = INTEL_VERSION_1000;
 	TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
 		pv = vol->v_md_data;
 		if (vol->v_stopping)
 			continue;
 		mvol = intel_get_volume(meta, vi);
 
 		/* New metadata may have different volumes order. */
 		pv->pv_volume_pos = vi;
 
 		for (sdi = 0; sdi < vol->v_disks_count; sdi++) {
 			sd = &vol->v_subdisks[sdi];
 			if (sd->sd_disk != NULL)
 				break;
 		}
 		if (sdi >= vol->v_disks_count)
 			panic("No any filled subdisk in volume");
 		if (vol->v_mediasize >= 0x20000000000llu)
 			meta->attributes |= INTEL_ATTR_2TB;
 		if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID0)
 			meta->attributes |= INTEL_ATTR_RAID0;
 		else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1)
 			meta->attributes |= INTEL_ATTR_RAID1;
 		else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID5)
 			meta->attributes |= INTEL_ATTR_RAID5;
 		else if ((vol->v_disks_count & 1) == 0)
 			meta->attributes |= INTEL_ATTR_RAID10;
 		else
 			meta->attributes |= INTEL_ATTR_RAID1E;
 		if (pv->pv_cng)
 			meta->attributes |= INTEL_ATTR_RAIDCNG;
 		if (vol->v_strip_size > 131072)
 			meta->attributes |= INTEL_ATTR_EXT_STRIP;
 
 		if (pv->pv_cng)
 			cv = INTEL_VERSION_1206;
 		else if (vol->v_disks_count > 4)
 			cv = INTEL_VERSION_1204;
 		else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID5)
 			cv = INTEL_VERSION_1202;
 		else if (vol->v_disks_count > 2)
 			cv = INTEL_VERSION_1201;
 		else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1)
 			cv = INTEL_VERSION_1100;
 		else
 			cv = INTEL_VERSION_1000;
 		if (strcmp(cv, version) > 0)
 			version = cv;
 
 		strlcpy(&mvol->name[0], vol->v_name, sizeof(mvol->name));
 		mvol->total_sectors = vol->v_mediasize / sectorsize;
 		mvol->state = (INTEL_ST_READ_COALESCING |
 		    INTEL_ST_WRITE_COALESCING);
 		mvol->tid = vol->v_global_id + 1;
 		if (pv->pv_cng) {
 			mvol->state |= INTEL_ST_CLONE_N_GO;
 			if (pv->pv_cng_man_sync)
 				mvol->state |= INTEL_ST_CLONE_MAN_SYNC;
 			mvol->cng_master_disk = pv->pv_cng_master_disk;
 			if (vol->v_subdisks[pv->pv_cng_master_disk].sd_state ==
 			    G_RAID_SUBDISK_S_NONE)
 				mvol->cng_state = INTEL_CNGST_MASTER_MISSING;
 			else if (vol->v_state != G_RAID_VOLUME_S_OPTIMAL)
 				mvol->cng_state = INTEL_CNGST_NEEDS_UPDATE;
 			else
 				mvol->cng_state = INTEL_CNGST_UPDATED;
 		}
 
 		/* Check for any recovery in progress. */
 		state = G_RAID_SUBDISK_S_ACTIVE;
 		pos = 0x7fffffffffffffffllu;
 		stale = 0;
 		for (sdi = 0; sdi < vol->v_disks_count; sdi++) {
 			sd = &vol->v_subdisks[sdi];
 			if (sd->sd_state == G_RAID_SUBDISK_S_REBUILD)
 				state = G_RAID_SUBDISK_S_REBUILD;
 			else if (sd->sd_state == G_RAID_SUBDISK_S_RESYNC &&
 			    state != G_RAID_SUBDISK_S_REBUILD)
 				state = G_RAID_SUBDISK_S_RESYNC;
 			else if (sd->sd_state == G_RAID_SUBDISK_S_STALE)
 				stale = 1;
 			if ((sd->sd_state == G_RAID_SUBDISK_S_REBUILD ||
 			    sd->sd_state == G_RAID_SUBDISK_S_RESYNC) &&
 			     sd->sd_rebuild_pos < pos)
 			        pos = sd->sd_rebuild_pos;
 		}
 		if (state == G_RAID_SUBDISK_S_REBUILD) {
 			mvol->migr_state = 1;
 			mvol->migr_type = INTEL_MT_REBUILD;
 		} else if (state == G_RAID_SUBDISK_S_RESYNC) {
 			mvol->migr_state = 1;
 			/* mvol->migr_type = INTEL_MT_REPAIR; */
 			mvol->migr_type = INTEL_MT_VERIFY;
 			mvol->state |= INTEL_ST_VERIFY_AND_FIX;
 		} else
 			mvol->migr_state = 0;
 		mvol->dirty = (vol->v_dirty || stale);
 
 		mmap0 = intel_get_map(mvol, 0);
 
 		/* Write map / common part of two maps. */
 		intel_set_map_offset(mmap0, sd->sd_offset / sectorsize);
 		intel_set_map_disk_sectors(mmap0, sd->sd_size / sectorsize);
 		mmap0->strip_sectors = vol->v_strip_size / sectorsize;
 		if (vol->v_state == G_RAID_VOLUME_S_BROKEN)
 			mmap0->status = INTEL_S_FAILURE;
 		else if (vol->v_state == G_RAID_VOLUME_S_DEGRADED)
 			mmap0->status = INTEL_S_DEGRADED;
 		else if (g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_UNINITIALIZED)
 		    == g_raid_nsubdisks(vol, -1))
 			mmap0->status = INTEL_S_UNINITIALIZED;
 		else
 			mmap0->status = INTEL_S_READY;
 		if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID0)
 			mmap0->type = INTEL_T_RAID0;
 		else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1 ||
 		    vol->v_raid_level == G_RAID_VOLUME_RL_RAID1E)
 			mmap0->type = INTEL_T_RAID1;
 		else
 			mmap0->type = INTEL_T_RAID5;
 		mmap0->total_disks = vol->v_disks_count;
 		if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1)
 			mmap0->total_domains = vol->v_disks_count;
 		else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1E)
 			mmap0->total_domains = 2;
 		else
 			mmap0->total_domains = 1;
 		intel_set_map_stripe_count(mmap0,
 		    sd->sd_size / vol->v_strip_size / mmap0->total_domains);
 		mmap0->failed_disk_num = 0xff;
 		mmap0->ddf = 1;
 
 		/* If there are two maps - copy common and update. */
 		if (mvol->migr_state) {
 			intel_set_vol_curr_migr_unit(mvol,
 			    pos / vol->v_strip_size / mmap0->total_domains);
 			mmap1 = intel_get_map(mvol, 1);
 			memcpy(mmap1, mmap0, sizeof(struct intel_raid_map));
 			mmap0->status = INTEL_S_READY;
 		} else
 			mmap1 = NULL;
 
 		/* Write disk indexes and put rebuild flags. */
 		for (sdi = 0; sdi < vol->v_disks_count; sdi++) {
 			sd = &vol->v_subdisks[sdi];
 			pd = (struct g_raid_md_intel_perdisk *)
 			    sd->sd_disk->d_md_data;
 			mmap0->disk_idx[sdi] = pd->pd_disk_pos;
 			if (mvol->migr_state)
 				mmap1->disk_idx[sdi] = pd->pd_disk_pos;
 			if (sd->sd_state == G_RAID_SUBDISK_S_REBUILD ||
 			    sd->sd_state == G_RAID_SUBDISK_S_RESYNC) {
 				mmap1->disk_idx[sdi] |= INTEL_DI_RBLD;
 			} else if (sd->sd_state != G_RAID_SUBDISK_S_ACTIVE &&
 			    sd->sd_state != G_RAID_SUBDISK_S_STALE &&
 			    sd->sd_state != G_RAID_SUBDISK_S_UNINITIALIZED) {
 				mmap0->disk_idx[sdi] |= INTEL_DI_RBLD;
 				if (mvol->migr_state)
 					mmap1->disk_idx[sdi] |= INTEL_DI_RBLD;
 			}
 			if ((sd->sd_state == G_RAID_SUBDISK_S_NONE ||
 			     sd->sd_state == G_RAID_SUBDISK_S_FAILED ||
 			     sd->sd_state == G_RAID_SUBDISK_S_REBUILD) &&
 			    mmap0->failed_disk_num == 0xff) {
 				mmap0->failed_disk_num = sdi;
 				if (mvol->migr_state)
 					mmap1->failed_disk_num = sdi;
 			}
 		}
 		vi++;
 	}
 	meta->total_volumes = vi;
 	if (vi > 1 || meta->attributes &
 	     (INTEL_ATTR_EXT_STRIP | INTEL_ATTR_2TB_DISK | INTEL_ATTR_2TB))
 		version = INTEL_VERSION_1300;
 	if (strcmp(version, INTEL_VERSION_1300) < 0)
 		meta->attributes &= INTEL_ATTR_CHECKSUM;
 	memcpy(&meta->version[0], version, sizeof(INTEL_VERSION_1000) - 1);
 
 	/* We are done. Print meta data and store them to disks. */
 	g_raid_md_intel_print(meta);
 	if (mdi->mdio_meta != NULL)
 		free(mdi->mdio_meta, M_MD_INTEL);
 	mdi->mdio_meta = meta;
 	TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 		pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
 		if (disk->d_state != G_RAID_DISK_S_ACTIVE)
 			continue;
 		if (pd->pd_meta != NULL) {
 			free(pd->pd_meta, M_MD_INTEL);
 			pd->pd_meta = NULL;
 		}
 		pd->pd_meta = intel_meta_copy(meta);
 		intel_meta_write(disk->d_consumer, meta);
 	}
 	return (0);
 }
 
 static int
 g_raid_md_fail_disk_intel(struct g_raid_md_object *md,
     struct g_raid_subdisk *tsd, struct g_raid_disk *tdisk)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_md_intel_object *mdi;
 	struct g_raid_md_intel_perdisk *pd;
 	struct g_raid_subdisk *sd;
 
 	sc = md->mdo_softc;
 	mdi = (struct g_raid_md_intel_object *)md;
 	pd = (struct g_raid_md_intel_perdisk *)tdisk->d_md_data;
 
 	/* We can't fail disk that is not a part of array now. */
 	if (pd->pd_disk_pos < 0)
 		return (-1);
 
 	/*
 	 * Mark disk as failed in metadata and try to write that metadata
 	 * to the disk itself to prevent it's later resurrection as STALE.
 	 */
 	mdi->mdio_meta->disk[pd->pd_disk_pos].flags = INTEL_F_FAILED;
 	pd->pd_disk_meta.flags = INTEL_F_FAILED;
 	g_raid_md_intel_print(mdi->mdio_meta);
 	if (tdisk->d_consumer)
 		intel_meta_write(tdisk->d_consumer, mdi->mdio_meta);
 
 	/* Change states. */
 	g_raid_change_disk_state(tdisk, G_RAID_DISK_S_FAILED);
 	TAILQ_FOREACH(sd, &tdisk->d_subdisks, sd_next) {
 		g_raid_change_subdisk_state(sd,
 		    G_RAID_SUBDISK_S_FAILED);
 		g_raid_event_send(sd, G_RAID_SUBDISK_E_FAILED,
 		    G_RAID_EVENT_SUBDISK);
 	}
 
 	/* Write updated metadata to remaining disks. */
 	g_raid_md_write_intel(md, NULL, NULL, tdisk);
 
 	/* Check if anything left except placeholders. */
 	if (g_raid_ndisks(sc, -1) ==
 	    g_raid_ndisks(sc, G_RAID_DISK_S_OFFLINE))
 		g_raid_destroy_node(sc, 0);
 	else
 		g_raid_md_intel_refill(sc);
 	return (0);
 }
 
 static int
 g_raid_md_free_disk_intel(struct g_raid_md_object *md,
     struct g_raid_disk *disk)
 {
 	struct g_raid_md_intel_perdisk *pd;
 
 	pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
 	if (pd->pd_meta != NULL) {
 		free(pd->pd_meta, M_MD_INTEL);
 		pd->pd_meta = NULL;
 	}
 	free(pd, M_MD_INTEL);
 	disk->d_md_data = NULL;
 	return (0);
 }
 
 static int
 g_raid_md_free_volume_intel(struct g_raid_md_object *md,
     struct g_raid_volume *vol)
 {
 	struct g_raid_md_intel_pervolume *pv;
 
 	pv = (struct g_raid_md_intel_pervolume *)vol->v_md_data;
 	free(pv, M_MD_INTEL);
 	vol->v_md_data = NULL;
 	return (0);
 }
 
 static int
 g_raid_md_free_intel(struct g_raid_md_object *md)
 {
 	struct g_raid_md_intel_object *mdi;
 
 	mdi = (struct g_raid_md_intel_object *)md;
 	if (!mdi->mdio_started) {
 		mdi->mdio_started = 0;
 		callout_stop(&mdi->mdio_start_co);
 		G_RAID_DEBUG1(1, md->mdo_softc,
 		    "root_mount_rel %p", mdi->mdio_rootmount);
 		root_mount_rel(mdi->mdio_rootmount);
 		mdi->mdio_rootmount = NULL;
 	}
 	if (mdi->mdio_meta != NULL) {
 		free(mdi->mdio_meta, M_MD_INTEL);
 		mdi->mdio_meta = NULL;
 	}
 	return (0);
 }
 
 G_RAID_MD_DECLARE(intel, "Intel");
Index: head/sys/geom/raid/md_jmicron.c
===================================================================
--- head/sys/geom/raid/md_jmicron.c	(revision 350693)
+++ head/sys/geom/raid/md_jmicron.c	(revision 350694)
@@ -1,1565 +1,1566 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2010 Alexander Motin <mav@FreeBSD.org>
  * Copyright (c) 2000 - 2008 Søren Schmidt <sos@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/bio.h>
 #include <sys/endian.h>
 #include <sys/kernel.h>
 #include <sys/kobj.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/systm.h>
 #include <sys/taskqueue.h>
 #include <geom/geom.h>
+#include <geom/geom_dbg.h>
 #include "geom/raid/g_raid.h"
 #include "g_raid_md_if.h"
 
 static MALLOC_DEFINE(M_MD_JMICRON, "md_jmicron_data", "GEOM_RAID JMicron metadata");
 
 #define	JMICRON_MAX_DISKS	8
 #define	JMICRON_MAX_SPARE	2
 
 struct jmicron_raid_conf {
     u_int8_t		signature[2];
 #define	JMICRON_MAGIC		"JM"
 
     u_int16_t		version;
 #define	JMICRON_VERSION		0x0001
 
     u_int16_t		checksum;
     u_int8_t		filler_1[10];
     u_int32_t		disk_id;
     u_int32_t		offset;
     u_int32_t		disk_sectors_high;
     u_int16_t		disk_sectors_low;
     u_int8_t		filler_2[2];
     u_int8_t		name[16];
     u_int8_t		type;
 #define	JMICRON_T_RAID0		0
 #define	JMICRON_T_RAID1		1
 #define	JMICRON_T_RAID01	2
 #define	JMICRON_T_CONCAT	3
 #define	JMICRON_T_RAID5		5
 
     u_int8_t		stripe_shift;
     u_int16_t		flags;
 #define	JMICRON_F_READY		0x0001
 #define	JMICRON_F_BOOTABLE	0x0002
 #define	JMICRON_F_BADSEC	0x0004
 #define	JMICRON_F_ACTIVE	0x0010
 #define	JMICRON_F_UNSYNC	0x0020
 #define	JMICRON_F_NEWEST	0x0040
 
     u_int8_t		filler_3[4];
     u_int32_t		spare[JMICRON_MAX_SPARE];
     u_int32_t		disks[JMICRON_MAX_DISKS];
 #define	JMICRON_DISK_MASK	0xFFFFFFF0
 #define	JMICRON_SEG_MASK	0x0000000F
     u_int8_t		filler_4[32];
     u_int8_t		filler_5[384];
 };
 
 struct g_raid_md_jmicron_perdisk {
 	struct jmicron_raid_conf	*pd_meta;
 	int				 pd_disk_pos;
 	int				 pd_disk_id;
 	off_t				 pd_disk_size;
 };
 
 struct g_raid_md_jmicron_object {
 	struct g_raid_md_object	 mdio_base;
 	uint32_t		 mdio_config_id;
 	struct jmicron_raid_conf	*mdio_meta;
 	struct callout		 mdio_start_co;	/* STARTING state timer. */
 	int			 mdio_total_disks;
 	int			 mdio_disks_present;
 	int			 mdio_started;
 	int			 mdio_incomplete;
 	struct root_hold_token	*mdio_rootmount; /* Root mount delay token. */
 };
 
 static g_raid_md_create_t g_raid_md_create_jmicron;
 static g_raid_md_taste_t g_raid_md_taste_jmicron;
 static g_raid_md_event_t g_raid_md_event_jmicron;
 static g_raid_md_ctl_t g_raid_md_ctl_jmicron;
 static g_raid_md_write_t g_raid_md_write_jmicron;
 static g_raid_md_fail_disk_t g_raid_md_fail_disk_jmicron;
 static g_raid_md_free_disk_t g_raid_md_free_disk_jmicron;
 static g_raid_md_free_t g_raid_md_free_jmicron;
 
 static kobj_method_t g_raid_md_jmicron_methods[] = {
 	KOBJMETHOD(g_raid_md_create,	g_raid_md_create_jmicron),
 	KOBJMETHOD(g_raid_md_taste,	g_raid_md_taste_jmicron),
 	KOBJMETHOD(g_raid_md_event,	g_raid_md_event_jmicron),
 	KOBJMETHOD(g_raid_md_ctl,	g_raid_md_ctl_jmicron),
 	KOBJMETHOD(g_raid_md_write,	g_raid_md_write_jmicron),
 	KOBJMETHOD(g_raid_md_fail_disk,	g_raid_md_fail_disk_jmicron),
 	KOBJMETHOD(g_raid_md_free_disk,	g_raid_md_free_disk_jmicron),
 	KOBJMETHOD(g_raid_md_free,	g_raid_md_free_jmicron),
 	{ 0, 0 }
 };
 
 static struct g_raid_md_class g_raid_md_jmicron_class = {
 	"JMicron",
 	g_raid_md_jmicron_methods,
 	sizeof(struct g_raid_md_jmicron_object),
 	.mdc_enable = 1,
 	.mdc_priority = 100
 };
 
 static void
 g_raid_md_jmicron_print(struct jmicron_raid_conf *meta)
 {
 	int k;
 
 	if (g_raid_debug < 1)
 		return;
 
 	printf("********* ATA JMicron RAID Metadata *********\n");
 	printf("signature           <%c%c>\n", meta->signature[0], meta->signature[1]);
 	printf("version             %04x\n", meta->version);
 	printf("checksum            0x%04x\n", meta->checksum);
 	printf("disk_id             0x%08x\n", meta->disk_id);
 	printf("offset              0x%08x\n", meta->offset);
 	printf("disk_sectors_high   0x%08x\n", meta->disk_sectors_high);
 	printf("disk_sectors_low    0x%04x\n", meta->disk_sectors_low);
 	printf("name                <%.16s>\n", meta->name);
 	printf("type                %d\n", meta->type);
 	printf("stripe_shift        %d\n", meta->stripe_shift);
 	printf("flags               %04x\n", meta->flags);
 	printf("spare              ");
 	for (k = 0; k < JMICRON_MAX_SPARE; k++)
 		printf(" 0x%08x", meta->spare[k]);
 	printf("\n");
 	printf("disks              ");
 	for (k = 0; k < JMICRON_MAX_DISKS; k++)
 		printf(" 0x%08x", meta->disks[k]);
 	printf("\n");
 	printf("=================================================\n");
 }
 
 static struct jmicron_raid_conf *
 jmicron_meta_copy(struct jmicron_raid_conf *meta)
 {
 	struct jmicron_raid_conf *nmeta;
 
 	nmeta = malloc(sizeof(*meta), M_MD_JMICRON, M_WAITOK);
 	memcpy(nmeta, meta, sizeof(*meta));
 	return (nmeta);
 }
 
 static int
 jmicron_meta_total_disks(struct jmicron_raid_conf *meta)
 {
 	int pos;
 
 	for (pos = 0; pos < JMICRON_MAX_DISKS; pos++) {
 		if (meta->disks[pos] == 0)
 			break;
 	}
 	return (pos);
 }
 
 static int
 jmicron_meta_total_spare(struct jmicron_raid_conf *meta)
 {
 	int pos, n;
 
 	n = 0;
 	for (pos = 0; pos < JMICRON_MAX_SPARE; pos++) {
 		if (meta->spare[pos] != 0)
 			n++;
 	}
 	return (n);
 }
 
 /*
  * Generate fake Configuration ID based on disk IDs.
  * Note: it will change after each disk set change.
  */
 static uint32_t
 jmicron_meta_config_id(struct jmicron_raid_conf *meta)
 {
 	int pos;
 	uint32_t config_id;
 
 	config_id = 0;
 	for (pos = 0; pos < JMICRON_MAX_DISKS; pos++)
 		config_id += meta->disks[pos] << pos;
 	return (config_id);
 }
 
 static void
 jmicron_meta_get_name(struct jmicron_raid_conf *meta, char *buf)
 {
 	int i;
 
 	strncpy(buf, meta->name, 16);
 	buf[16] = 0;
 	for (i = 15; i >= 0; i--) {
 		if (buf[i] > 0x20)
 			break;
 		buf[i] = 0;
 	}
 }
 
 static void
 jmicron_meta_put_name(struct jmicron_raid_conf *meta, char *buf)
 {
 
 	memset(meta->name, 0x20, 16);
 	memcpy(meta->name, buf, MIN(strlen(buf), 16));
 }
 
 static int
 jmicron_meta_find_disk(struct jmicron_raid_conf *meta, uint32_t id)
 {
 	int pos;
 
 	id &= JMICRON_DISK_MASK;
 	for (pos = 0; pos < JMICRON_MAX_DISKS; pos++) {
 		if ((meta->disks[pos] & JMICRON_DISK_MASK) == id)
 			return (pos);
 	}
 	for (pos = 0; pos < JMICRON_MAX_SPARE; pos++) {
 		if ((meta->spare[pos] & JMICRON_DISK_MASK) == id)
 			return (-3);
 	}
 	return (-1);
 }
 
 static struct jmicron_raid_conf *
 jmicron_meta_read(struct g_consumer *cp)
 {
 	struct g_provider *pp;
 	struct jmicron_raid_conf *meta;
 	char *buf;
 	int error, i;
 	uint16_t checksum, *ptr;
 
 	pp = cp->provider;
 
 	/* Read the anchor sector. */
 	buf = g_read_data(cp,
 	    pp->mediasize - pp->sectorsize, pp->sectorsize, &error);
 	if (buf == NULL) {
 		G_RAID_DEBUG(1, "Cannot read metadata from %s (error=%d).",
 		    pp->name, error);
 		return (NULL);
 	}
 	meta = (struct jmicron_raid_conf *)buf;
 
 	/* Check if this is an JMicron RAID struct */
 	if (strncmp(meta->signature, JMICRON_MAGIC, strlen(JMICRON_MAGIC))) {
 		G_RAID_DEBUG(1, "JMicron signature check failed on %s", pp->name);
 		g_free(buf);
 		return (NULL);
 	}
 	meta = malloc(sizeof(*meta), M_MD_JMICRON, M_WAITOK);
 	memcpy(meta, buf, min(sizeof(*meta), pp->sectorsize));
 	g_free(buf);
 
 	/* Check metadata checksum. */
 	for (checksum = 0, ptr = (uint16_t *)meta, i = 0; i < 64; i++)
 		checksum += *ptr++;
 	if (checksum != 0) {
 		G_RAID_DEBUG(1, "JMicron checksum check failed on %s", pp->name);
 		free(meta, M_MD_JMICRON);
 		return (NULL);
 	}
 
 	return (meta);
 }
 
 static int
 jmicron_meta_write(struct g_consumer *cp, struct jmicron_raid_conf *meta)
 {
 	struct g_provider *pp;
 	char *buf;
 	int error, i;
 	uint16_t checksum, *ptr;
 
 	pp = cp->provider;
 
 	/* Recalculate checksum for case if metadata were changed. */
 	meta->checksum = 0;
 	for (checksum = 0, ptr = (uint16_t *)meta, i = 0; i < 64; i++)
 		checksum += *ptr++;
 	meta->checksum -= checksum;
 
 	/* Create and fill buffer. */
 	buf = malloc(pp->sectorsize, M_MD_JMICRON, M_WAITOK | M_ZERO);
 	memcpy(buf, meta, sizeof(*meta));
 
 	error = g_write_data(cp,
 	    pp->mediasize - pp->sectorsize, buf, pp->sectorsize);
 	if (error != 0) {
 		G_RAID_DEBUG(1, "Cannot write metadata to %s (error=%d).",
 		    pp->name, error);
 	}
 
 	free(buf, M_MD_JMICRON);
 	return (error);
 }
 
 static int
 jmicron_meta_erase(struct g_consumer *cp)
 {
 	struct g_provider *pp;
 	char *buf;
 	int error;
 
 	pp = cp->provider;
 	buf = malloc(pp->sectorsize, M_MD_JMICRON, M_WAITOK | M_ZERO);
 	error = g_write_data(cp,
 	    pp->mediasize - pp->sectorsize, buf, pp->sectorsize);
 	if (error != 0) {
 		G_RAID_DEBUG(1, "Cannot erase metadata on %s (error=%d).",
 		    pp->name, error);
 	}
 	free(buf, M_MD_JMICRON);
 	return (error);
 }
 
 static struct g_raid_disk *
 g_raid_md_jmicron_get_disk(struct g_raid_softc *sc, int id)
 {
 	struct g_raid_disk	*disk;
 	struct g_raid_md_jmicron_perdisk *pd;
 
 	TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 		pd = (struct g_raid_md_jmicron_perdisk *)disk->d_md_data;
 		if (pd->pd_disk_pos == id)
 			break;
 	}
 	return (disk);
 }
 
 static int
 g_raid_md_jmicron_supported(int level, int qual, int disks, int force)
 {
 
 	if (disks > 8)
 		return (0);
 	switch (level) {
 	case G_RAID_VOLUME_RL_RAID0:
 		if (disks < 1)
 			return (0);
 		if (!force && (disks < 2 || disks > 6))
 			return (0);
 		break;
 	case G_RAID_VOLUME_RL_RAID1:
 		if (disks < 1)
 			return (0);
 		if (!force && (disks != 2))
 			return (0);
 		break;
 	case G_RAID_VOLUME_RL_RAID1E:
 		if (disks < 2)
 			return (0);
 		if (!force && (disks != 4))
 			return (0);
 		break;
 	case G_RAID_VOLUME_RL_SINGLE:
 		if (disks != 1)
 			return (0);
 		if (!force)
 			return (0);
 		break;
 	case G_RAID_VOLUME_RL_CONCAT:
 		if (disks < 2)
 			return (0);
 		break;
 	case G_RAID_VOLUME_RL_RAID5:
 		if (disks < 3)
 			return (0);
 		if (qual != G_RAID_VOLUME_RLQ_R5LA)
 			return (0);
 		if (!force)
 			return (0);
 		break;
 	default:
 		return (0);
 	}
 	if (level != G_RAID_VOLUME_RL_RAID5 && qual != G_RAID_VOLUME_RLQ_NONE)
 		return (0);
 	return (1);
 }
 
 static int
 g_raid_md_jmicron_start_disk(struct g_raid_disk *disk)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_subdisk *sd, *tmpsd;
 	struct g_raid_disk *olddisk, *tmpdisk;
 	struct g_raid_md_object *md;
 	struct g_raid_md_jmicron_object *mdi;
 	struct g_raid_md_jmicron_perdisk *pd, *oldpd;
 	struct jmicron_raid_conf *meta;
 	int disk_pos, resurrection = 0;
 
 	sc = disk->d_softc;
 	md = sc->sc_md;
 	mdi = (struct g_raid_md_jmicron_object *)md;
 	meta = mdi->mdio_meta;
 	pd = (struct g_raid_md_jmicron_perdisk *)disk->d_md_data;
 	olddisk = NULL;
 
 	/* Find disk position in metadata by its serial. */
 	if (pd->pd_meta != NULL)
 		disk_pos = jmicron_meta_find_disk(meta, pd->pd_disk_id);
 	else
 		disk_pos = -1;
 	if (disk_pos < 0) {
 		G_RAID_DEBUG1(1, sc, "Unknown, probably new or stale disk");
 		/* If we are in the start process, that's all for now. */
 		if (!mdi->mdio_started)
 			goto nofit;
 		/*
 		 * If we have already started - try to get use of the disk.
 		 * Try to replace OFFLINE disks first, then FAILED.
 		 */
 		TAILQ_FOREACH(tmpdisk, &sc->sc_disks, d_next) {
 			if (tmpdisk->d_state != G_RAID_DISK_S_OFFLINE &&
 			    tmpdisk->d_state != G_RAID_DISK_S_FAILED)
 				continue;
 			/* Make sure this disk is big enough. */
 			TAILQ_FOREACH(sd, &tmpdisk->d_subdisks, sd_next) {
 				if (sd->sd_offset + sd->sd_size + 512 >
 				    pd->pd_disk_size) {
 					G_RAID_DEBUG1(1, sc,
 					    "Disk too small (%ju < %ju)",
 					    pd->pd_disk_size,
 					    sd->sd_offset + sd->sd_size + 512);
 					break;
 				}
 			}
 			if (sd != NULL)
 				continue;
 			if (tmpdisk->d_state == G_RAID_DISK_S_OFFLINE) {
 				olddisk = tmpdisk;
 				break;
 			} else if (olddisk == NULL)
 				olddisk = tmpdisk;
 		}
 		if (olddisk == NULL) {
 nofit:
 			if (disk_pos == -3 || pd->pd_disk_pos == -3) {
 				g_raid_change_disk_state(disk,
 				    G_RAID_DISK_S_SPARE);
 				return (1);
 			} else {
 				g_raid_change_disk_state(disk,
 				    G_RAID_DISK_S_STALE);
 				return (0);
 			}
 		}
 		oldpd = (struct g_raid_md_jmicron_perdisk *)olddisk->d_md_data;
 		disk_pos = oldpd->pd_disk_pos;
 		resurrection = 1;
 	}
 
 	if (olddisk == NULL) {
 		/* Find placeholder by position. */
 		olddisk = g_raid_md_jmicron_get_disk(sc, disk_pos);
 		if (olddisk == NULL)
 			panic("No disk at position %d!", disk_pos);
 		if (olddisk->d_state != G_RAID_DISK_S_OFFLINE) {
 			G_RAID_DEBUG1(1, sc, "More than one disk for pos %d",
 			    disk_pos);
 			g_raid_change_disk_state(disk, G_RAID_DISK_S_STALE);
 			return (0);
 		}
 		oldpd = (struct g_raid_md_jmicron_perdisk *)olddisk->d_md_data;
 	}
 
 	/* Replace failed disk or placeholder with new disk. */
 	TAILQ_FOREACH_SAFE(sd, &olddisk->d_subdisks, sd_next, tmpsd) {
 		TAILQ_REMOVE(&olddisk->d_subdisks, sd, sd_next);
 		TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next);
 		sd->sd_disk = disk;
 	}
 	oldpd->pd_disk_pos = -2;
 	pd->pd_disk_pos = disk_pos;
 	/* Update global metadata just in case. */
 	meta->disks[disk_pos] = pd->pd_disk_id;
 
 	/* If it was placeholder -- destroy it. */
 	if (olddisk->d_state == G_RAID_DISK_S_OFFLINE) {
 		g_raid_destroy_disk(olddisk);
 	} else {
 		/* Otherwise, make it STALE_FAILED. */
 		g_raid_change_disk_state(olddisk, G_RAID_DISK_S_STALE_FAILED);
 	}
 
 	/* Welcome the new disk. */
 	g_raid_change_disk_state(disk, G_RAID_DISK_S_ACTIVE);
 	TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) {
 
 		/*
 		 * Different disks may have different sizes/offsets,
 		 * especially in concat mode. Update.
 		 */
 		if (!resurrection) {
 			sd->sd_offset =
 			    (off_t)pd->pd_meta->offset * 16 * 512; //ZZZ
 			sd->sd_size =
 			    (((off_t)pd->pd_meta->disk_sectors_high << 16) +
 			      pd->pd_meta->disk_sectors_low) * 512;
 		}
 
 		if (resurrection) {
 			/* Stale disk, almost same as new. */
 			g_raid_change_subdisk_state(sd,
 			    G_RAID_SUBDISK_S_NEW);
 		} else if ((meta->flags & JMICRON_F_BADSEC) != 0 &&
 		    (pd->pd_meta->flags & JMICRON_F_BADSEC) == 0) {
 			/* Cold-inserted or rebuilding disk. */
 			g_raid_change_subdisk_state(sd,
 			    G_RAID_SUBDISK_S_NEW);
 		} else if (pd->pd_meta->flags & JMICRON_F_UNSYNC) {
 			/* Dirty or resyncing disk.. */
 			g_raid_change_subdisk_state(sd,
 			    G_RAID_SUBDISK_S_STALE);
 		} else {
 			/* Up to date disk. */
 			g_raid_change_subdisk_state(sd,
 			    G_RAID_SUBDISK_S_ACTIVE);
 		}
 		g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW,
 		    G_RAID_EVENT_SUBDISK);
 	}
 
 	/* Update status of our need for spare. */
 	if (mdi->mdio_started) {
 		mdi->mdio_incomplete =
 		    (g_raid_ndisks(sc, G_RAID_DISK_S_ACTIVE) <
 		     mdi->mdio_total_disks);
 	}
 
 	return (resurrection);
 }
 
 static void
 g_disk_md_jmicron_retaste(void *arg, int pending)
 {
 
 	G_RAID_DEBUG(1, "Array is not complete, trying to retaste.");
 	g_retaste(&g_raid_class);
 	free(arg, M_MD_JMICRON);
 }
 
 static void
 g_raid_md_jmicron_refill(struct g_raid_softc *sc)
 {
 	struct g_raid_md_object *md;
 	struct g_raid_md_jmicron_object *mdi;
 	struct g_raid_disk *disk;
 	struct task *task;
 	int update, na;
 
 	md = sc->sc_md;
 	mdi = (struct g_raid_md_jmicron_object *)md;
 	update = 0;
 	do {
 		/* Make sure we miss anything. */
 		na = g_raid_ndisks(sc, G_RAID_DISK_S_ACTIVE);
 		if (na == mdi->mdio_total_disks)
 			break;
 
 		G_RAID_DEBUG1(1, md->mdo_softc,
 		    "Array is not complete (%d of %d), "
 		    "trying to refill.", na, mdi->mdio_total_disks);
 
 		/* Try to get use some of STALE disks. */
 		TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 			if (disk->d_state == G_RAID_DISK_S_STALE) {
 				update += g_raid_md_jmicron_start_disk(disk);
 				if (disk->d_state == G_RAID_DISK_S_ACTIVE)
 					break;
 			}
 		}
 		if (disk != NULL)
 			continue;
 
 		/* Try to get use some of SPARE disks. */
 		TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 			if (disk->d_state == G_RAID_DISK_S_SPARE) {
 				update += g_raid_md_jmicron_start_disk(disk);
 				if (disk->d_state == G_RAID_DISK_S_ACTIVE)
 					break;
 			}
 		}
 	} while (disk != NULL);
 
 	/* Write new metadata if we changed something. */
 	if (update)
 		g_raid_md_write_jmicron(md, NULL, NULL, NULL);
 
 	/* Update status of our need for spare. */
 	mdi->mdio_incomplete = (g_raid_ndisks(sc, G_RAID_DISK_S_ACTIVE) <
 	    mdi->mdio_total_disks);
 
 	/* Request retaste hoping to find spare. */
 	if (mdi->mdio_incomplete) {
 		task = malloc(sizeof(struct task),
 		    M_MD_JMICRON, M_WAITOK | M_ZERO);
 		TASK_INIT(task, 0, g_disk_md_jmicron_retaste, task);
 		taskqueue_enqueue(taskqueue_swi, task);
 	}
 }
 
 static void
 g_raid_md_jmicron_start(struct g_raid_softc *sc)
 {
 	struct g_raid_md_object *md;
 	struct g_raid_md_jmicron_object *mdi;
 	struct g_raid_md_jmicron_perdisk *pd;
 	struct jmicron_raid_conf *meta;
 	struct g_raid_volume *vol;
 	struct g_raid_subdisk *sd;
 	struct g_raid_disk *disk;
 	off_t size;
 	int j, disk_pos;
 	char buf[17];
 
 	md = sc->sc_md;
 	mdi = (struct g_raid_md_jmicron_object *)md;
 	meta = mdi->mdio_meta;
 
 	/* Create volumes and subdisks. */
 	jmicron_meta_get_name(meta, buf);
 	vol = g_raid_create_volume(sc, buf, -1);
 	size = ((off_t)meta->disk_sectors_high << 16) + meta->disk_sectors_low;
 	size *= 512; //ZZZ
 	vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_NONE;
 	if (meta->type == JMICRON_T_RAID0) {
 		vol->v_raid_level = G_RAID_VOLUME_RL_RAID0;
 		vol->v_mediasize = size * mdi->mdio_total_disks;
 	} else if (meta->type == JMICRON_T_RAID1) {
 		vol->v_raid_level = G_RAID_VOLUME_RL_RAID1;
 		vol->v_mediasize = size;
 	} else if (meta->type == JMICRON_T_RAID01) {
 		vol->v_raid_level = G_RAID_VOLUME_RL_RAID1E;
 		vol->v_mediasize = size * mdi->mdio_total_disks / 2;
 	} else if (meta->type == JMICRON_T_CONCAT) {
 		if (mdi->mdio_total_disks == 1)
 			vol->v_raid_level = G_RAID_VOLUME_RL_SINGLE;
 		else
 			vol->v_raid_level = G_RAID_VOLUME_RL_CONCAT;
 		vol->v_mediasize = 0;
 	} else if (meta->type == JMICRON_T_RAID5) {
 		vol->v_raid_level = G_RAID_VOLUME_RL_RAID5;
 		vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_R5LA;
 		vol->v_mediasize = size * (mdi->mdio_total_disks - 1);
 	} else {
 		vol->v_raid_level = G_RAID_VOLUME_RL_UNKNOWN;
 		vol->v_mediasize = 0;
 	}
 	vol->v_strip_size = 1024 << meta->stripe_shift; //ZZZ
 	vol->v_disks_count = mdi->mdio_total_disks;
 	vol->v_sectorsize = 512; //ZZZ
 	for (j = 0; j < vol->v_disks_count; j++) {
 		sd = &vol->v_subdisks[j];
 		sd->sd_offset = (off_t)meta->offset * 16 * 512; //ZZZ
 		sd->sd_size = size;
 	}
 	g_raid_start_volume(vol);
 
 	/* Create disk placeholders to store data for later writing. */
 	for (disk_pos = 0; disk_pos < mdi->mdio_total_disks; disk_pos++) {
 		pd = malloc(sizeof(*pd), M_MD_JMICRON, M_WAITOK | M_ZERO);
 		pd->pd_disk_pos = disk_pos;
 		pd->pd_disk_id = meta->disks[disk_pos];
 		disk = g_raid_create_disk(sc);
 		disk->d_md_data = (void *)pd;
 		disk->d_state = G_RAID_DISK_S_OFFLINE;
 		sd = &vol->v_subdisks[disk_pos];
 		sd->sd_disk = disk;
 		TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next);
 	}
 
 	/* Make all disks found till the moment take their places. */
 	do {
 		TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 			if (disk->d_state == G_RAID_DISK_S_NONE) {
 				g_raid_md_jmicron_start_disk(disk);
 				break;
 			}
 		}
 	} while (disk != NULL);
 
 	mdi->mdio_started = 1;
 	G_RAID_DEBUG1(0, sc, "Array started.");
 	g_raid_md_write_jmicron(md, NULL, NULL, NULL);
 
 	/* Pickup any STALE/SPARE disks to refill array if needed. */
 	g_raid_md_jmicron_refill(sc);
 
 	g_raid_event_send(vol, G_RAID_VOLUME_E_START, G_RAID_EVENT_VOLUME);
 
 	callout_stop(&mdi->mdio_start_co);
 	G_RAID_DEBUG1(1, sc, "root_mount_rel %p", mdi->mdio_rootmount);
 	root_mount_rel(mdi->mdio_rootmount);
 	mdi->mdio_rootmount = NULL;
 }
 
 static void
 g_raid_md_jmicron_new_disk(struct g_raid_disk *disk)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_md_object *md;
 	struct g_raid_md_jmicron_object *mdi;
 	struct jmicron_raid_conf *pdmeta;
 	struct g_raid_md_jmicron_perdisk *pd;
 
 	sc = disk->d_softc;
 	md = sc->sc_md;
 	mdi = (struct g_raid_md_jmicron_object *)md;
 	pd = (struct g_raid_md_jmicron_perdisk *)disk->d_md_data;
 	pdmeta = pd->pd_meta;
 
 	if (mdi->mdio_started) {
 		if (g_raid_md_jmicron_start_disk(disk))
 			g_raid_md_write_jmicron(md, NULL, NULL, NULL);
 	} else {
 		/*
 		 * If we haven't started yet - update common metadata
 		 * to get subdisks details, avoiding data from spare disks.
 		 */
 		if (mdi->mdio_meta == NULL ||
 		    jmicron_meta_find_disk(mdi->mdio_meta,
 		     mdi->mdio_meta->disk_id) == -3) {
 			if (mdi->mdio_meta != NULL)
 				free(mdi->mdio_meta, M_MD_JMICRON);
 			mdi->mdio_meta = jmicron_meta_copy(pdmeta);
 			mdi->mdio_total_disks = jmicron_meta_total_disks(pdmeta);
 		}
 		mdi->mdio_meta->flags |= pdmeta->flags & JMICRON_F_BADSEC;
 
 		mdi->mdio_disks_present++;
 		G_RAID_DEBUG1(1, sc, "Matching disk (%d of %d+%d up)",
 		    mdi->mdio_disks_present,
 		    mdi->mdio_total_disks,
 		    jmicron_meta_total_spare(mdi->mdio_meta));
 
 		/* If we collected all needed disks - start array. */
 		if (mdi->mdio_disks_present == mdi->mdio_total_disks +
 		    jmicron_meta_total_spare(mdi->mdio_meta))
 			g_raid_md_jmicron_start(sc);
 	}
 }
 
 static void
 g_raid_jmicron_go(void *arg)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_md_object *md;
 	struct g_raid_md_jmicron_object *mdi;
 
 	sc = arg;
 	md = sc->sc_md;
 	mdi = (struct g_raid_md_jmicron_object *)md;
 	if (!mdi->mdio_started) {
 		G_RAID_DEBUG1(0, sc, "Force array start due to timeout.");
 		g_raid_event_send(sc, G_RAID_NODE_E_START, 0);
 	}
 }
 
 static int
 g_raid_md_create_jmicron(struct g_raid_md_object *md, struct g_class *mp,
     struct g_geom **gp)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_md_jmicron_object *mdi;
 	char name[16];
 
 	mdi = (struct g_raid_md_jmicron_object *)md;
 	mdi->mdio_config_id = arc4random();
 	snprintf(name, sizeof(name), "JMicron-%08x", mdi->mdio_config_id);
 	sc = g_raid_create_node(mp, name, md);
 	if (sc == NULL)
 		return (G_RAID_MD_TASTE_FAIL);
 	md->mdo_softc = sc;
 	*gp = sc->sc_geom;
 	return (G_RAID_MD_TASTE_NEW);
 }
 
 static int
 g_raid_md_taste_jmicron(struct g_raid_md_object *md, struct g_class *mp,
                               struct g_consumer *cp, struct g_geom **gp)
 {
 	struct g_consumer *rcp;
 	struct g_provider *pp;
 	struct g_raid_md_jmicron_object *mdi, *mdi1;
 	struct g_raid_softc *sc;
 	struct g_raid_disk *disk;
 	struct jmicron_raid_conf *meta;
 	struct g_raid_md_jmicron_perdisk *pd;
 	struct g_geom *geom;
 	int disk_pos, result, spare, len;
 	char name[16];
 	uint16_t vendor;
 
 	G_RAID_DEBUG(1, "Tasting JMicron on %s", cp->provider->name);
 	mdi = (struct g_raid_md_jmicron_object *)md;
 	pp = cp->provider;
 
 	/* Read metadata from device. */
 	meta = NULL;
 	g_topology_unlock();
 	vendor = 0xffff;
 	len = sizeof(vendor);
 	if (pp->geom->rank == 1)
 		g_io_getattr("GEOM::hba_vendor", cp, &len, &vendor);
 	meta = jmicron_meta_read(cp);
 	g_topology_lock();
 	if (meta == NULL) {
 		if (g_raid_aggressive_spare) {
 			if (vendor == 0x197b) {
 				G_RAID_DEBUG(1,
 				    "No JMicron metadata, forcing spare.");
 				spare = 2;
 				goto search;
 			} else {
 				G_RAID_DEBUG(1,
 				    "JMicron vendor mismatch 0x%04x != 0x197b",
 				    vendor);
 			}
 		}
 		return (G_RAID_MD_TASTE_FAIL);
 	}
 
 	/* Check this disk position in obtained metadata. */
 	disk_pos = jmicron_meta_find_disk(meta, meta->disk_id);
 	if (disk_pos == -1) {
 		G_RAID_DEBUG(1, "JMicron disk_id %08x not found",
 		    meta->disk_id);
 		goto fail1;
 	}
 
 	/* Metadata valid. Print it. */
 	g_raid_md_jmicron_print(meta);
 	G_RAID_DEBUG(1, "JMicron disk position %d", disk_pos);
 	spare = (disk_pos == -2) ? 1 : 0;
 
 search:
 	/* Search for matching node. */
 	sc = NULL;
 	mdi1 = NULL;
 	LIST_FOREACH(geom, &mp->geom, geom) {
 		sc = geom->softc;
 		if (sc == NULL)
 			continue;
 		if (sc->sc_stopping != 0)
 			continue;
 		if (sc->sc_md->mdo_class != md->mdo_class)
 			continue;
 		mdi1 = (struct g_raid_md_jmicron_object *)sc->sc_md;
 		if (spare == 2) {
 			if (mdi1->mdio_incomplete)
 				break;
 		} else {
 			if (mdi1->mdio_config_id ==
 			    jmicron_meta_config_id(meta))
 				break;
 		}
 	}
 
 	/* Found matching node. */
 	if (geom != NULL) {
 		G_RAID_DEBUG(1, "Found matching array %s", sc->sc_name);
 		result = G_RAID_MD_TASTE_EXISTING;
 
 	} else if (spare) { /* Not found needy node -- left for later. */
 		G_RAID_DEBUG(1, "Spare is not needed at this time");
 		goto fail1;
 
 	} else { /* Not found matching node -- create one. */
 		result = G_RAID_MD_TASTE_NEW;
 		mdi->mdio_config_id = jmicron_meta_config_id(meta);
 		snprintf(name, sizeof(name), "JMicron-%08x",
 		    mdi->mdio_config_id);
 		sc = g_raid_create_node(mp, name, md);
 		md->mdo_softc = sc;
 		geom = sc->sc_geom;
 		callout_init(&mdi->mdio_start_co, 1);
 		callout_reset(&mdi->mdio_start_co, g_raid_start_timeout * hz,
 		    g_raid_jmicron_go, sc);
 		mdi->mdio_rootmount = root_mount_hold("GRAID-JMicron");
 		G_RAID_DEBUG1(1, sc, "root_mount_hold %p", mdi->mdio_rootmount);
 	}
 
 	/* There is no return after this point, so we close passed consumer. */
 	g_access(cp, -1, 0, 0);
 
 	rcp = g_new_consumer(geom);
 	rcp->flags |= G_CF_DIRECT_RECEIVE;
 	g_attach(rcp, pp);
 	if (g_access(rcp, 1, 1, 1) != 0)
 		; //goto fail1;
 
 	g_topology_unlock();
 	sx_xlock(&sc->sc_lock);
 
 	pd = malloc(sizeof(*pd), M_MD_JMICRON, M_WAITOK | M_ZERO);
 	pd->pd_meta = meta;
 	if (spare == 2) {
 		pd->pd_disk_pos = -3;
 		pd->pd_disk_id = arc4random() & JMICRON_DISK_MASK;
 	} else {
 		pd->pd_disk_pos = -1;
 		pd->pd_disk_id = meta->disk_id;
 	}
 	pd->pd_disk_size = pp->mediasize;
 	disk = g_raid_create_disk(sc);
 	disk->d_md_data = (void *)pd;
 	disk->d_consumer = rcp;
 	rcp->private = disk;
 
 	g_raid_get_disk_info(disk);
 
 	g_raid_md_jmicron_new_disk(disk);
 
 	sx_xunlock(&sc->sc_lock);
 	g_topology_lock();
 	*gp = geom;
 	return (result);
 fail1:
 	free(meta, M_MD_JMICRON);
 	return (G_RAID_MD_TASTE_FAIL);
 }
 
 static int
 g_raid_md_event_jmicron(struct g_raid_md_object *md,
     struct g_raid_disk *disk, u_int event)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_subdisk *sd;
 	struct g_raid_md_jmicron_object *mdi;
 	struct g_raid_md_jmicron_perdisk *pd;
 
 	sc = md->mdo_softc;
 	mdi = (struct g_raid_md_jmicron_object *)md;
 	if (disk == NULL) {
 		switch (event) {
 		case G_RAID_NODE_E_START:
 			if (!mdi->mdio_started)
 				g_raid_md_jmicron_start(sc);
 			return (0);
 		}
 		return (-1);
 	}
 	pd = (struct g_raid_md_jmicron_perdisk *)disk->d_md_data;
 	switch (event) {
 	case G_RAID_DISK_E_DISCONNECTED:
 		/* If disk was assigned, just update statuses. */
 		if (pd->pd_disk_pos >= 0) {
 			g_raid_change_disk_state(disk, G_RAID_DISK_S_OFFLINE);
 			if (disk->d_consumer) {
 				g_raid_kill_consumer(sc, disk->d_consumer);
 				disk->d_consumer = NULL;
 			}
 			TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) {
 				g_raid_change_subdisk_state(sd,
 				    G_RAID_SUBDISK_S_NONE);
 				g_raid_event_send(sd, G_RAID_SUBDISK_E_DISCONNECTED,
 				    G_RAID_EVENT_SUBDISK);
 			}
 		} else {
 			/* Otherwise -- delete. */
 			g_raid_change_disk_state(disk, G_RAID_DISK_S_NONE);
 			g_raid_destroy_disk(disk);
 		}
 
 		/* Write updated metadata to all disks. */
 		g_raid_md_write_jmicron(md, NULL, NULL, NULL);
 
 		/* Check if anything left except placeholders. */
 		if (g_raid_ndisks(sc, -1) ==
 		    g_raid_ndisks(sc, G_RAID_DISK_S_OFFLINE))
 			g_raid_destroy_node(sc, 0);
 		else
 			g_raid_md_jmicron_refill(sc);
 		return (0);
 	}
 	return (-2);
 }
 
 static int
 g_raid_md_ctl_jmicron(struct g_raid_md_object *md,
     struct gctl_req *req)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_volume *vol;
 	struct g_raid_subdisk *sd;
 	struct g_raid_disk *disk;
 	struct g_raid_md_jmicron_object *mdi;
 	struct g_raid_md_jmicron_perdisk *pd;
 	struct g_consumer *cp;
 	struct g_provider *pp;
 	char arg[16];
 	const char *verb, *volname, *levelname, *diskname;
 	int *nargs, *force;
 	off_t size, sectorsize, strip;
 	intmax_t *sizearg, *striparg;
 	int numdisks, i, len, level, qual, update;
 	int error;
 
 	sc = md->mdo_softc;
 	mdi = (struct g_raid_md_jmicron_object *)md;
 	verb = gctl_get_param(req, "verb", NULL);
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	error = 0;
 	if (strcmp(verb, "label") == 0) {
 
 		if (*nargs < 4) {
 			gctl_error(req, "Invalid number of arguments.");
 			return (-1);
 		}
 		volname = gctl_get_asciiparam(req, "arg1");
 		if (volname == NULL) {
 			gctl_error(req, "No volume name.");
 			return (-2);
 		}
 		levelname = gctl_get_asciiparam(req, "arg2");
 		if (levelname == NULL) {
 			gctl_error(req, "No RAID level.");
 			return (-3);
 		}
 		if (strcasecmp(levelname, "RAID5") == 0)
 			levelname = "RAID5-LA";
 		if (g_raid_volume_str2level(levelname, &level, &qual)) {
 			gctl_error(req, "Unknown RAID level '%s'.", levelname);
 			return (-4);
 		}
 		numdisks = *nargs - 3;
 		force = gctl_get_paraml(req, "force", sizeof(*force));
 		if (!g_raid_md_jmicron_supported(level, qual, numdisks,
 		    force ? *force : 0)) {
 			gctl_error(req, "Unsupported RAID level "
 			    "(0x%02x/0x%02x), or number of disks (%d).",
 			    level, qual, numdisks);
 			return (-5);
 		}
 
 		/* Search for disks, connect them and probe. */
 		size = 0x7fffffffffffffffllu;
 		sectorsize = 0;
 		for (i = 0; i < numdisks; i++) {
 			snprintf(arg, sizeof(arg), "arg%d", i + 3);
 			diskname = gctl_get_asciiparam(req, arg);
 			if (diskname == NULL) {
 				gctl_error(req, "No disk name (%s).", arg);
 				error = -6;
 				break;
 			}
 			if (strcmp(diskname, "NONE") == 0) {
 				cp = NULL;
 				pp = NULL;
 			} else {
 				g_topology_lock();
 				cp = g_raid_open_consumer(sc, diskname);
 				if (cp == NULL) {
 					gctl_error(req, "Can't open '%s'.",
 					    diskname);
 					g_topology_unlock();
 					error = -7;
 					break;
 				}
 				pp = cp->provider;
 			}
 			pd = malloc(sizeof(*pd), M_MD_JMICRON, M_WAITOK | M_ZERO);
 			pd->pd_disk_pos = i;
 			pd->pd_disk_id = arc4random() & JMICRON_DISK_MASK;
 			disk = g_raid_create_disk(sc);
 			disk->d_md_data = (void *)pd;
 			disk->d_consumer = cp;
 			if (cp == NULL)
 				continue;
 			cp->private = disk;
 			g_topology_unlock();
 
 			g_raid_get_disk_info(disk);
 
 			pd->pd_disk_size = pp->mediasize;
 			if (size > pp->mediasize)
 				size = pp->mediasize;
 			if (sectorsize < pp->sectorsize)
 				sectorsize = pp->sectorsize;
 		}
 		if (error != 0)
 			return (error);
 
 		if (sectorsize <= 0) {
 			gctl_error(req, "Can't get sector size.");
 			return (-8);
 		}
 
 		/* Reserve space for metadata. */
 		size -= sectorsize;
 
 		/* Handle size argument. */
 		len = sizeof(*sizearg);
 		sizearg = gctl_get_param(req, "size", &len);
 		if (sizearg != NULL && len == sizeof(*sizearg) &&
 		    *sizearg > 0) {
 			if (*sizearg > size) {
 				gctl_error(req, "Size too big %lld > %lld.",
 				    (long long)*sizearg, (long long)size);
 				return (-9);
 			}
 			size = *sizearg;
 		}
 
 		/* Handle strip argument. */
 		strip = 131072;
 		len = sizeof(*striparg);
 		striparg = gctl_get_param(req, "strip", &len);
 		if (striparg != NULL && len == sizeof(*striparg) &&
 		    *striparg > 0) {
 			if (*striparg < sectorsize) {
 				gctl_error(req, "Strip size too small.");
 				return (-10);
 			}
 			if (*striparg % sectorsize != 0) {
 				gctl_error(req, "Incorrect strip size.");
 				return (-11);
 			}
 			if (strip > 65535 * sectorsize) {
 				gctl_error(req, "Strip size too big.");
 				return (-12);
 			}
 			strip = *striparg;
 		}
 
 		/* Round size down to strip or sector. */
 		if (level == G_RAID_VOLUME_RL_RAID1)
 			size -= (size % sectorsize);
 		else if (level == G_RAID_VOLUME_RL_RAID1E &&
 		    (numdisks & 1) != 0)
 			size -= (size % (2 * strip));
 		else
 			size -= (size % strip);
 		if (size <= 0) {
 			gctl_error(req, "Size too small.");
 			return (-13);
 		}
 		if (size > 0xffffffffffffllu * sectorsize) {
 			gctl_error(req, "Size too big.");
 			return (-14);
 		}
 
 		/* We have all we need, create things: volume, ... */
 		mdi->mdio_total_disks = numdisks;
 		mdi->mdio_started = 1;
 		vol = g_raid_create_volume(sc, volname, -1);
 		vol->v_md_data = (void *)(intptr_t)0;
 		vol->v_raid_level = level;
 		vol->v_raid_level_qualifier = qual;
 		vol->v_strip_size = strip;
 		vol->v_disks_count = numdisks;
 		if (level == G_RAID_VOLUME_RL_RAID0 ||
 		    level == G_RAID_VOLUME_RL_CONCAT ||
 		    level == G_RAID_VOLUME_RL_SINGLE)
 			vol->v_mediasize = size * numdisks;
 		else if (level == G_RAID_VOLUME_RL_RAID1)
 			vol->v_mediasize = size;
 		else if (level == G_RAID_VOLUME_RL_RAID5)
 			vol->v_mediasize = size * (numdisks - 1);
 		else { /* RAID1E */
 			vol->v_mediasize = ((size * numdisks) / strip / 2) *
 			    strip;
 		}
 		vol->v_sectorsize = sectorsize;
 		g_raid_start_volume(vol);
 
 		/* , and subdisks. */
 		TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 			pd = (struct g_raid_md_jmicron_perdisk *)disk->d_md_data;
 			sd = &vol->v_subdisks[pd->pd_disk_pos];
 			sd->sd_disk = disk;
 			sd->sd_offset = 0;
 			sd->sd_size = size;
 			TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next);
 			if (sd->sd_disk->d_consumer != NULL) {
 				g_raid_change_disk_state(disk,
 				    G_RAID_DISK_S_ACTIVE);
 				g_raid_change_subdisk_state(sd,
 				    G_RAID_SUBDISK_S_ACTIVE);
 				g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW,
 				    G_RAID_EVENT_SUBDISK);
 			} else {
 				g_raid_change_disk_state(disk, G_RAID_DISK_S_OFFLINE);
 			}
 		}
 
 		/* Write metadata based on created entities. */
 		G_RAID_DEBUG1(0, sc, "Array started.");
 		g_raid_md_write_jmicron(md, NULL, NULL, NULL);
 
 		/* Pickup any STALE/SPARE disks to refill array if needed. */
 		g_raid_md_jmicron_refill(sc);
 
 		g_raid_event_send(vol, G_RAID_VOLUME_E_START,
 		    G_RAID_EVENT_VOLUME);
 		return (0);
 	}
 	if (strcmp(verb, "delete") == 0) {
 
 		/* Check if some volume is still open. */
 		force = gctl_get_paraml(req, "force", sizeof(*force));
 		if (force != NULL && *force == 0 &&
 		    g_raid_nopens(sc) != 0) {
 			gctl_error(req, "Some volume is still open.");
 			return (-4);
 		}
 
 		TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 			if (disk->d_consumer)
 				jmicron_meta_erase(disk->d_consumer);
 		}
 		g_raid_destroy_node(sc, 0);
 		return (0);
 	}
 	if (strcmp(verb, "remove") == 0 ||
 	    strcmp(verb, "fail") == 0) {
 		if (*nargs < 2) {
 			gctl_error(req, "Invalid number of arguments.");
 			return (-1);
 		}
 		for (i = 1; i < *nargs; i++) {
 			snprintf(arg, sizeof(arg), "arg%d", i);
 			diskname = gctl_get_asciiparam(req, arg);
 			if (diskname == NULL) {
 				gctl_error(req, "No disk name (%s).", arg);
 				error = -2;
 				break;
 			}
 			if (strncmp(diskname, "/dev/", 5) == 0)
 				diskname += 5;
 
 			TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 				if (disk->d_consumer != NULL && 
 				    disk->d_consumer->provider != NULL &&
 				    strcmp(disk->d_consumer->provider->name,
 				     diskname) == 0)
 					break;
 			}
 			if (disk == NULL) {
 				gctl_error(req, "Disk '%s' not found.",
 				    diskname);
 				error = -3;
 				break;
 			}
 
 			if (strcmp(verb, "fail") == 0) {
 				g_raid_md_fail_disk_jmicron(md, NULL, disk);
 				continue;
 			}
 
 			pd = (struct g_raid_md_jmicron_perdisk *)disk->d_md_data;
 
 			/* Erase metadata on deleting disk. */
 			jmicron_meta_erase(disk->d_consumer);
 
 			/* If disk was assigned, just update statuses. */
 			if (pd->pd_disk_pos >= 0) {
 				g_raid_change_disk_state(disk, G_RAID_DISK_S_OFFLINE);
 				g_raid_kill_consumer(sc, disk->d_consumer);
 				disk->d_consumer = NULL;
 				TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) {
 					g_raid_change_subdisk_state(sd,
 					    G_RAID_SUBDISK_S_NONE);
 					g_raid_event_send(sd, G_RAID_SUBDISK_E_DISCONNECTED,
 					    G_RAID_EVENT_SUBDISK);
 				}
 			} else {
 				/* Otherwise -- delete. */
 				g_raid_change_disk_state(disk, G_RAID_DISK_S_NONE);
 				g_raid_destroy_disk(disk);
 			}
 		}
 
 		/* Write updated metadata to remaining disks. */
 		g_raid_md_write_jmicron(md, NULL, NULL, NULL);
 
 		/* Check if anything left except placeholders. */
 		if (g_raid_ndisks(sc, -1) ==
 		    g_raid_ndisks(sc, G_RAID_DISK_S_OFFLINE))
 			g_raid_destroy_node(sc, 0);
 		else
 			g_raid_md_jmicron_refill(sc);
 		return (error);
 	}
 	if (strcmp(verb, "insert") == 0) {
 		if (*nargs < 2) {
 			gctl_error(req, "Invalid number of arguments.");
 			return (-1);
 		}
 		update = 0;
 		for (i = 1; i < *nargs; i++) {
 			/* Get disk name. */
 			snprintf(arg, sizeof(arg), "arg%d", i);
 			diskname = gctl_get_asciiparam(req, arg);
 			if (diskname == NULL) {
 				gctl_error(req, "No disk name (%s).", arg);
 				error = -3;
 				break;
 			}
 
 			/* Try to find provider with specified name. */
 			g_topology_lock();
 			cp = g_raid_open_consumer(sc, diskname);
 			if (cp == NULL) {
 				gctl_error(req, "Can't open disk '%s'.",
 				    diskname);
 				g_topology_unlock();
 				error = -4;
 				break;
 			}
 			pp = cp->provider;
 
 			pd = malloc(sizeof(*pd), M_MD_JMICRON, M_WAITOK | M_ZERO);
 			pd->pd_disk_pos = -3;
 			pd->pd_disk_id = arc4random() & JMICRON_DISK_MASK;
 			pd->pd_disk_size = pp->mediasize;
 
 			disk = g_raid_create_disk(sc);
 			disk->d_consumer = cp;
 			disk->d_md_data = (void *)pd;
 			cp->private = disk;
 			g_topology_unlock();
 
 			g_raid_get_disk_info(disk);
 
 			/* Welcome the "new" disk. */
 			update += g_raid_md_jmicron_start_disk(disk);
 			if (disk->d_state != G_RAID_DISK_S_ACTIVE &&
 			    disk->d_state != G_RAID_DISK_S_SPARE) {
 				gctl_error(req, "Disk '%s' doesn't fit.",
 				    diskname);
 				g_raid_destroy_disk(disk);
 				error = -8;
 				break;
 			}
 		}
 
 		/* Write new metadata if we changed something. */
 		if (update)
 			g_raid_md_write_jmicron(md, NULL, NULL, NULL);
 		return (error);
 	}
 	gctl_error(req, "Command '%s' is not supported.", verb);
 	return (-100);
 }
 
 static int
 g_raid_md_write_jmicron(struct g_raid_md_object *md, struct g_raid_volume *tvol,
     struct g_raid_subdisk *tsd, struct g_raid_disk *tdisk)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_volume *vol;
 	struct g_raid_subdisk *sd;
 	struct g_raid_disk *disk;
 	struct g_raid_md_jmicron_object *mdi;
 	struct g_raid_md_jmicron_perdisk *pd;
 	struct jmicron_raid_conf *meta;
 	int i, spares;
 
 	sc = md->mdo_softc;
 	mdi = (struct g_raid_md_jmicron_object *)md;
 
 	if (sc->sc_stopping == G_RAID_DESTROY_HARD)
 		return (0);
 
 	/* There is only one volume. */
 	vol = TAILQ_FIRST(&sc->sc_volumes);
 
 	/* Fill global fields. */
 	meta = malloc(sizeof(*meta), M_MD_JMICRON, M_WAITOK | M_ZERO);
 	strncpy(meta->signature, JMICRON_MAGIC, 2);
 	meta->version = JMICRON_VERSION;
 	jmicron_meta_put_name(meta, vol->v_name);
 	if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID0)
 		meta->type = JMICRON_T_RAID0;
 	else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1)
 		meta->type = JMICRON_T_RAID1;
 	else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1E)
 		meta->type = JMICRON_T_RAID01;
 	else if (vol->v_raid_level == G_RAID_VOLUME_RL_CONCAT ||
 	    vol->v_raid_level == G_RAID_VOLUME_RL_SINGLE)
 		meta->type = JMICRON_T_CONCAT;
 	else
 		meta->type = JMICRON_T_RAID5;
 	meta->stripe_shift = fls(vol->v_strip_size / 2048);
 	meta->flags = JMICRON_F_READY | JMICRON_F_BOOTABLE;
 	for (i = 0; i < vol->v_disks_count; i++) {
 		sd = &vol->v_subdisks[i];
 		if (sd->sd_disk == NULL || sd->sd_disk->d_md_data == NULL)
 			meta->disks[i] = 0xffffffff;
 		else {
 			pd = (struct g_raid_md_jmicron_perdisk *)
 			    sd->sd_disk->d_md_data;
 			meta->disks[i] = pd->pd_disk_id;
 		}
 		if (sd->sd_state < G_RAID_SUBDISK_S_STALE)
 			meta->flags |= JMICRON_F_BADSEC;
 		if (vol->v_dirty)
 			meta->flags |= JMICRON_F_UNSYNC;
 	}
 
 	/* Put spares to their slots. */
 	spares = 0;
 	TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 		pd = (struct g_raid_md_jmicron_perdisk *)disk->d_md_data;
 		if (disk->d_state != G_RAID_DISK_S_SPARE)
 			continue;
 		meta->spare[spares] = pd->pd_disk_id;
 		if (++spares >= 2)
 			break;
 	}
 
 	/* We are done. Print meta data and store them to disks. */
 	if (mdi->mdio_meta != NULL)
 		free(mdi->mdio_meta, M_MD_JMICRON);
 	mdi->mdio_meta = meta;
 	TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 		pd = (struct g_raid_md_jmicron_perdisk *)disk->d_md_data;
 		if (disk->d_state != G_RAID_DISK_S_ACTIVE &&
 		    disk->d_state != G_RAID_DISK_S_SPARE)
 			continue;
 		if (pd->pd_meta != NULL) {
 			free(pd->pd_meta, M_MD_JMICRON);
 			pd->pd_meta = NULL;
 		}
 		pd->pd_meta = jmicron_meta_copy(meta);
 		pd->pd_meta->disk_id = pd->pd_disk_id;
 		if ((sd = TAILQ_FIRST(&disk->d_subdisks)) != NULL) {
 			pd->pd_meta->offset =
 			    (sd->sd_offset / 512) / 16;
 			pd->pd_meta->disk_sectors_high =
 			    (sd->sd_size / 512) >> 16;
 			pd->pd_meta->disk_sectors_low =
 			    (sd->sd_size / 512) & 0xffff;
 			if (sd->sd_state < G_RAID_SUBDISK_S_STALE)
 				pd->pd_meta->flags &= ~JMICRON_F_BADSEC;
 			else if (sd->sd_state < G_RAID_SUBDISK_S_ACTIVE)
 				pd->pd_meta->flags |= JMICRON_F_UNSYNC;
 		}
 		G_RAID_DEBUG(1, "Writing JMicron metadata to %s",
 		    g_raid_get_diskname(disk));
 		g_raid_md_jmicron_print(pd->pd_meta);
 		jmicron_meta_write(disk->d_consumer, pd->pd_meta);
 	}
 	return (0);
 }
 
 static int
 g_raid_md_fail_disk_jmicron(struct g_raid_md_object *md,
     struct g_raid_subdisk *tsd, struct g_raid_disk *tdisk)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_md_jmicron_perdisk *pd;
 	struct g_raid_subdisk *sd;
 
 	sc = md->mdo_softc;
 	pd = (struct g_raid_md_jmicron_perdisk *)tdisk->d_md_data;
 
 	/* We can't fail disk that is not a part of array now. */
 	if (pd->pd_disk_pos < 0)
 		return (-1);
 
 	if (tdisk->d_consumer)
 		jmicron_meta_erase(tdisk->d_consumer);
 
 	/* Change states. */
 	g_raid_change_disk_state(tdisk, G_RAID_DISK_S_FAILED);
 	TAILQ_FOREACH(sd, &tdisk->d_subdisks, sd_next) {
 		g_raid_change_subdisk_state(sd,
 		    G_RAID_SUBDISK_S_FAILED);
 		g_raid_event_send(sd, G_RAID_SUBDISK_E_FAILED,
 		    G_RAID_EVENT_SUBDISK);
 	}
 
 	/* Write updated metadata to remaining disks. */
 	g_raid_md_write_jmicron(md, NULL, NULL, tdisk);
 
 	/* Check if anything left except placeholders. */
 	if (g_raid_ndisks(sc, -1) ==
 	    g_raid_ndisks(sc, G_RAID_DISK_S_OFFLINE))
 		g_raid_destroy_node(sc, 0);
 	else
 		g_raid_md_jmicron_refill(sc);
 	return (0);
 }
 
 static int
 g_raid_md_free_disk_jmicron(struct g_raid_md_object *md,
     struct g_raid_disk *disk)
 {
 	struct g_raid_md_jmicron_perdisk *pd;
 
 	pd = (struct g_raid_md_jmicron_perdisk *)disk->d_md_data;
 	if (pd->pd_meta != NULL) {
 		free(pd->pd_meta, M_MD_JMICRON);
 		pd->pd_meta = NULL;
 	}
 	free(pd, M_MD_JMICRON);
 	disk->d_md_data = NULL;
 	return (0);
 }
 
 static int
 g_raid_md_free_jmicron(struct g_raid_md_object *md)
 {
 	struct g_raid_md_jmicron_object *mdi;
 
 	mdi = (struct g_raid_md_jmicron_object *)md;
 	if (!mdi->mdio_started) {
 		mdi->mdio_started = 0;
 		callout_stop(&mdi->mdio_start_co);
 		G_RAID_DEBUG1(1, md->mdo_softc,
 		    "root_mount_rel %p", mdi->mdio_rootmount);
 		root_mount_rel(mdi->mdio_rootmount);
 		mdi->mdio_rootmount = NULL;
 	}
 	if (mdi->mdio_meta != NULL) {
 		free(mdi->mdio_meta, M_MD_JMICRON);
 		mdi->mdio_meta = NULL;
 	}
 	return (0);
 }
 
 G_RAID_MD_DECLARE(jmicron, "JMicron");
Index: head/sys/geom/raid/md_nvidia.c
===================================================================
--- head/sys/geom/raid/md_nvidia.c	(revision 350693)
+++ head/sys/geom/raid/md_nvidia.c	(revision 350694)
@@ -1,1585 +1,1586 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2011 Alexander Motin <mav@FreeBSD.org>
  * Copyright (c) 2000 - 2008 Søren Schmidt <sos@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/bio.h>
 #include <sys/endian.h>
 #include <sys/kernel.h>
 #include <sys/kobj.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/systm.h>
 #include <sys/taskqueue.h>
 #include <geom/geom.h>
+#include <geom/geom_dbg.h>
 #include "geom/raid/g_raid.h"
 #include "g_raid_md_if.h"
 
 static MALLOC_DEFINE(M_MD_NVIDIA, "md_nvidia_data", "GEOM_RAID NVIDIA metadata");
 
 struct nvidia_raid_conf {
 	uint8_t		nvidia_id[8];
 #define NVIDIA_MAGIC                "NVIDIA  "
 
 	uint32_t	config_size;
 	uint32_t	checksum;
 	uint16_t	version;
 	uint8_t		disk_number;
 	uint8_t		dummy_0;
 	uint32_t	total_sectors;
 	uint32_t	sector_size;
 	uint8_t		name[16];
 	uint8_t		revision[4];
 	uint32_t	disk_status;
 
 	uint32_t	magic_0;
 #define NVIDIA_MAGIC0		0x00640044
 
 	uint64_t	volume_id[2];
 	uint8_t		state;
 #define NVIDIA_S_IDLE		0
 #define NVIDIA_S_INIT		2
 #define NVIDIA_S_REBUILD	3
 #define NVIDIA_S_UPGRADE	4
 #define NVIDIA_S_SYNC		5
 	uint8_t		array_width;
 	uint8_t		total_disks;
 	uint8_t		orig_array_width;
 	uint16_t	type;
 #define NVIDIA_T_RAID0		0x0080
 #define NVIDIA_T_RAID1		0x0081
 #define NVIDIA_T_RAID3		0x0083
 #define NVIDIA_T_RAID5		0x0085	/* RLQ = 00/02? */
 #define NVIDIA_T_RAID5_SYM	0x0095	/* RLQ = 03 */
 #define NVIDIA_T_RAID10		0x008a
 #define NVIDIA_T_RAID01		0x8180
 #define NVIDIA_T_CONCAT		0x00ff
 
 	uint16_t	dummy_3;
 	uint32_t	strip_sectors;
 	uint32_t	strip_bytes;
 	uint32_t	strip_shift;
 	uint32_t	strip_mask;
 	uint32_t	stripe_sectors;
 	uint32_t	stripe_bytes;
 	uint32_t	rebuild_lba;
 	uint32_t	orig_type;
 	uint32_t	orig_total_sectors;
 	uint32_t	status;
 #define NVIDIA_S_BOOTABLE	0x00000001
 #define NVIDIA_S_DEGRADED	0x00000002
 
 	uint32_t	filler[98];
 } __packed;
 
 struct g_raid_md_nvidia_perdisk {
 	struct nvidia_raid_conf	*pd_meta;
 	int			 pd_disk_pos;
 	off_t			 pd_disk_size;
 };
 
 struct g_raid_md_nvidia_object {
 	struct g_raid_md_object	 mdio_base;
 	uint64_t		 mdio_volume_id[2];
 	struct nvidia_raid_conf	*mdio_meta;
 	struct callout		 mdio_start_co;	/* STARTING state timer. */
 	int			 mdio_total_disks;
 	int			 mdio_disks_present;
 	int			 mdio_started;
 	int			 mdio_incomplete;
 	struct root_hold_token	*mdio_rootmount; /* Root mount delay token. */
 };
 
 static g_raid_md_create_t g_raid_md_create_nvidia;
 static g_raid_md_taste_t g_raid_md_taste_nvidia;
 static g_raid_md_event_t g_raid_md_event_nvidia;
 static g_raid_md_ctl_t g_raid_md_ctl_nvidia;
 static g_raid_md_write_t g_raid_md_write_nvidia;
 static g_raid_md_fail_disk_t g_raid_md_fail_disk_nvidia;
 static g_raid_md_free_disk_t g_raid_md_free_disk_nvidia;
 static g_raid_md_free_t g_raid_md_free_nvidia;
 
 static kobj_method_t g_raid_md_nvidia_methods[] = {
 	KOBJMETHOD(g_raid_md_create,	g_raid_md_create_nvidia),
 	KOBJMETHOD(g_raid_md_taste,	g_raid_md_taste_nvidia),
 	KOBJMETHOD(g_raid_md_event,	g_raid_md_event_nvidia),
 	KOBJMETHOD(g_raid_md_ctl,	g_raid_md_ctl_nvidia),
 	KOBJMETHOD(g_raid_md_write,	g_raid_md_write_nvidia),
 	KOBJMETHOD(g_raid_md_fail_disk,	g_raid_md_fail_disk_nvidia),
 	KOBJMETHOD(g_raid_md_free_disk,	g_raid_md_free_disk_nvidia),
 	KOBJMETHOD(g_raid_md_free,	g_raid_md_free_nvidia),
 	{ 0, 0 }
 };
 
 static struct g_raid_md_class g_raid_md_nvidia_class = {
 	"NVIDIA",
 	g_raid_md_nvidia_methods,
 	sizeof(struct g_raid_md_nvidia_object),
 	.mdc_enable = 1,
 	.mdc_priority = 100
 };
 
 static int NVIDIANodeID = 1;
 
 static void
 g_raid_md_nvidia_print(struct nvidia_raid_conf *meta)
 {
 
 	if (g_raid_debug < 1)
 		return;
 
 	printf("********* ATA NVIDIA RAID Metadata *********\n");
 	printf("nvidia_id           <%.8s>\n", meta->nvidia_id);
 	printf("config_size         %u\n", meta->config_size);
 	printf("checksum            0x%08x\n", meta->checksum);
 	printf("version             0x%04x\n", meta->version);
 	printf("disk_number         %d\n", meta->disk_number);
 	printf("dummy_0             0x%02x\n", meta->dummy_0);
 	printf("total_sectors       %u\n", meta->total_sectors);
 	printf("sector_size         %u\n", meta->sector_size);
 	printf("name                <%.16s>\n", meta->name);
 	printf("revision            0x%02x%02x%02x%02x\n",
 	    meta->revision[0], meta->revision[1],
 	    meta->revision[2], meta->revision[3]);
 	printf("disk_status         0x%08x\n", meta->disk_status);
 	printf("magic_0             0x%08x\n", meta->magic_0);
 	printf("volume_id           0x%016jx%016jx\n",
 	    meta->volume_id[1], meta->volume_id[0]);
 	printf("state               0x%02x\n", meta->state);
 	printf("array_width         %u\n", meta->array_width);
 	printf("total_disks         %u\n", meta->total_disks);
 	printf("orig_array_width    %u\n", meta->orig_array_width);
 	printf("type                0x%04x\n", meta->type);
 	printf("dummy_3             0x%04x\n", meta->dummy_3);
 	printf("strip_sectors       %u\n", meta->strip_sectors);
 	printf("strip_bytes         %u\n", meta->strip_bytes);
 	printf("strip_shift         %u\n", meta->strip_shift);
 	printf("strip_mask          0x%08x\n", meta->strip_mask);
 	printf("stripe_sectors      %u\n", meta->stripe_sectors);
 	printf("stripe_bytes        %u\n", meta->stripe_bytes);
 	printf("rebuild_lba         %u\n", meta->rebuild_lba);
 	printf("orig_type           0x%04x\n", meta->orig_type);
 	printf("orig_total_sectors  %u\n", meta->orig_total_sectors);
 	printf("status              0x%08x\n", meta->status);
 	printf("=================================================\n");
 }
 
 static struct nvidia_raid_conf *
 nvidia_meta_copy(struct nvidia_raid_conf *meta)
 {
 	struct nvidia_raid_conf *nmeta;
 
 	nmeta = malloc(sizeof(*meta), M_MD_NVIDIA, M_WAITOK);
 	memcpy(nmeta, meta, sizeof(*meta));
 	return (nmeta);
 }
 
 static int
 nvidia_meta_translate_disk(struct nvidia_raid_conf *meta, int md_disk_pos)
 {
 	int disk_pos;
 
 	if (md_disk_pos >= 0 && meta->type == NVIDIA_T_RAID01) {
 		disk_pos = (md_disk_pos / meta->array_width) +
 		    (md_disk_pos % meta->array_width) * meta->array_width;
 	} else
 		disk_pos = md_disk_pos;
 	return (disk_pos);
 }
 
 static void
 nvidia_meta_get_name(struct nvidia_raid_conf *meta, char *buf)
 {
 	int i;
 
 	strncpy(buf, meta->name, 16);
 	buf[16] = 0;
 	for (i = 15; i >= 0; i--) {
 		if (buf[i] > 0x20)
 			break;
 		buf[i] = 0;
 	}
 }
 
 static void
 nvidia_meta_put_name(struct nvidia_raid_conf *meta, char *buf)
 {
 
 	memset(meta->name, 0x20, 16);
 	memcpy(meta->name, buf, MIN(strlen(buf), 16));
 }
 
 static struct nvidia_raid_conf *
 nvidia_meta_read(struct g_consumer *cp)
 {
 	struct g_provider *pp;
 	struct nvidia_raid_conf *meta;
 	char *buf;
 	int error, i;
 	uint32_t checksum, *ptr;
 
 	pp = cp->provider;
 
 	/* Read the anchor sector. */
 	buf = g_read_data(cp,
 	    pp->mediasize - 2 * pp->sectorsize, pp->sectorsize, &error);
 	if (buf == NULL) {
 		G_RAID_DEBUG(1, "Cannot read metadata from %s (error=%d).",
 		    pp->name, error);
 		return (NULL);
 	}
 	meta = (struct nvidia_raid_conf *)buf;
 
 	/* Check if this is an NVIDIA RAID struct */
 	if (strncmp(meta->nvidia_id, NVIDIA_MAGIC, strlen(NVIDIA_MAGIC))) {
 		G_RAID_DEBUG(1, "NVIDIA signature check failed on %s", pp->name);
 		g_free(buf);
 		return (NULL);
 	}
 	if (meta->config_size > 128 ||
 	    meta->config_size < 30) {
 		G_RAID_DEBUG(1, "NVIDIA metadata size looks wrong: %d",
 		    meta->config_size);
 		g_free(buf);
 		return (NULL);
 	}
 	meta = malloc(sizeof(*meta), M_MD_NVIDIA, M_WAITOK);
 	memcpy(meta, buf, min(sizeof(*meta), pp->sectorsize));
 	g_free(buf);
 
 	/* Check metadata checksum. */
 	for (checksum = 0, ptr = (uint32_t *)meta,
 	    i = 0; i < meta->config_size; i++)
 		checksum += *ptr++;
 	if (checksum != 0) {
 		G_RAID_DEBUG(1, "NVIDIA checksum check failed on %s", pp->name);
 		free(meta, M_MD_NVIDIA);
 		return (NULL);
 	}
 
 	/* Check volume state. */
 	if (meta->state != NVIDIA_S_IDLE && meta->state != NVIDIA_S_INIT &&
 	    meta->state != NVIDIA_S_REBUILD && meta->state != NVIDIA_S_SYNC) {
 		G_RAID_DEBUG(1, "NVIDIA unknown state on %s (0x%02x)",
 		    pp->name, meta->state);
 		free(meta, M_MD_NVIDIA);
 		return (NULL);
 	}
 
 	/* Check raid type. */
 	if (meta->type != NVIDIA_T_RAID0 && meta->type != NVIDIA_T_RAID1 &&
 	    meta->type != NVIDIA_T_RAID3 && meta->type != NVIDIA_T_RAID5 &&
 	    meta->type != NVIDIA_T_RAID5_SYM &&
 	    meta->type != NVIDIA_T_RAID01 && meta->type != NVIDIA_T_CONCAT) {
 		G_RAID_DEBUG(1, "NVIDIA unknown RAID level on %s (0x%02x)",
 		    pp->name, meta->type);
 		free(meta, M_MD_NVIDIA);
 		return (NULL);
 	}
 
 	return (meta);
 }
 
 static int
 nvidia_meta_write(struct g_consumer *cp, struct nvidia_raid_conf *meta)
 {
 	struct g_provider *pp;
 	char *buf;
 	int error, i;
 	uint32_t checksum, *ptr;
 
 	pp = cp->provider;
 
 	/* Recalculate checksum for case if metadata were changed. */
 	meta->checksum = 0;
 	for (checksum = 0, ptr = (uint32_t *)meta,
 	    i = 0; i < meta->config_size; i++)
 		checksum += *ptr++;
 	meta->checksum -= checksum;
 
 	/* Create and fill buffer. */
 	buf = malloc(pp->sectorsize, M_MD_NVIDIA, M_WAITOK | M_ZERO);
 	memcpy(buf, meta, sizeof(*meta));
 
 	/* Write metadata. */
 	error = g_write_data(cp,
 	    pp->mediasize - 2 * pp->sectorsize, buf, pp->sectorsize);
 	if (error != 0) {
 		G_RAID_DEBUG(1, "Cannot write metadata to %s (error=%d).",
 		    pp->name, error);
 	}
 
 	free(buf, M_MD_NVIDIA);
 	return (error);
 }
 
 static int
 nvidia_meta_erase(struct g_consumer *cp)
 {
 	struct g_provider *pp;
 	char *buf;
 	int error;
 
 	pp = cp->provider;
 	buf = malloc(pp->sectorsize, M_MD_NVIDIA, M_WAITOK | M_ZERO);
 	error = g_write_data(cp,
 	    pp->mediasize - 2 * pp->sectorsize, buf, pp->sectorsize);
 	if (error != 0) {
 		G_RAID_DEBUG(1, "Cannot erase metadata on %s (error=%d).",
 		    pp->name, error);
 	}
 	free(buf, M_MD_NVIDIA);
 	return (error);
 }
 
 static struct g_raid_disk *
 g_raid_md_nvidia_get_disk(struct g_raid_softc *sc, int id)
 {
 	struct g_raid_disk	*disk;
 	struct g_raid_md_nvidia_perdisk *pd;
 
 	TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 		pd = (struct g_raid_md_nvidia_perdisk *)disk->d_md_data;
 		if (pd->pd_disk_pos == id)
 			break;
 	}
 	return (disk);
 }
 
 static int
 g_raid_md_nvidia_supported(int level, int qual, int disks, int force)
 {
 
 	switch (level) {
 	case G_RAID_VOLUME_RL_RAID0:
 		if (disks < 1)
 			return (0);
 		if (!force && (disks < 2 || disks > 6))
 			return (0);
 		break;
 	case G_RAID_VOLUME_RL_RAID1:
 		if (disks < 1)
 			return (0);
 		if (!force && (disks != 2))
 			return (0);
 		break;
 	case G_RAID_VOLUME_RL_RAID1E:
 		if (disks < 2)
 			return (0);
 		if (disks % 2 != 0)
 			return (0);
 		if (!force && (disks < 4))
 			return (0);
 		break;
 	case G_RAID_VOLUME_RL_SINGLE:
 		if (disks != 1)
 			return (0);
 		break;
 	case G_RAID_VOLUME_RL_CONCAT:
 		if (disks < 2)
 			return (0);
 		break;
 	case G_RAID_VOLUME_RL_RAID5:
 		if (disks < 3)
 			return (0);
 		if (qual != G_RAID_VOLUME_RLQ_R5LA &&
 		    qual != G_RAID_VOLUME_RLQ_R5LS)
 			return (0);
 		break;
 	default:
 		return (0);
 	}
 	if (level != G_RAID_VOLUME_RL_RAID5 && qual != G_RAID_VOLUME_RLQ_NONE)
 		return (0);
 	return (1);
 }
 
 static int
 g_raid_md_nvidia_start_disk(struct g_raid_disk *disk)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_subdisk *sd, *tmpsd;
 	struct g_raid_disk *olddisk, *tmpdisk;
 	struct g_raid_md_object *md;
 	struct g_raid_md_nvidia_object *mdi;
 	struct g_raid_md_nvidia_perdisk *pd, *oldpd;
 	struct nvidia_raid_conf *meta;
 	int disk_pos, resurrection = 0;
 
 	sc = disk->d_softc;
 	md = sc->sc_md;
 	mdi = (struct g_raid_md_nvidia_object *)md;
 	meta = mdi->mdio_meta;
 	pd = (struct g_raid_md_nvidia_perdisk *)disk->d_md_data;
 	olddisk = NULL;
 
 	/* Find disk position in metadata by its serial. */
 	if (pd->pd_meta != NULL) {
 		disk_pos = pd->pd_meta->disk_number;
 		if (disk_pos >= meta->total_disks || mdi->mdio_started)
 			disk_pos = -3;
 	} else
 		disk_pos = -3;
 	/* For RAID0+1 we need to translate order. */
 	disk_pos = nvidia_meta_translate_disk(meta, disk_pos);
 	if (disk_pos < 0) {
 		G_RAID_DEBUG1(1, sc, "Unknown, probably new or stale disk");
 		/* If we are in the start process, that's all for now. */
 		if (!mdi->mdio_started)
 			goto nofit;
 		/*
 		 * If we have already started - try to get use of the disk.
 		 * Try to replace OFFLINE disks first, then FAILED.
 		 */
 		TAILQ_FOREACH(tmpdisk, &sc->sc_disks, d_next) {
 			if (tmpdisk->d_state != G_RAID_DISK_S_OFFLINE &&
 			    tmpdisk->d_state != G_RAID_DISK_S_FAILED)
 				continue;
 			/* Make sure this disk is big enough. */
 			TAILQ_FOREACH(sd, &tmpdisk->d_subdisks, sd_next) {
 				if (sd->sd_offset + sd->sd_size + 2 * 512 >
 				    pd->pd_disk_size) {
 					G_RAID_DEBUG1(1, sc,
 					    "Disk too small (%ju < %ju)",
 					    pd->pd_disk_size,
 					    sd->sd_offset + sd->sd_size + 512);
 					break;
 				}
 			}
 			if (sd != NULL)
 				continue;
 			if (tmpdisk->d_state == G_RAID_DISK_S_OFFLINE) {
 				olddisk = tmpdisk;
 				break;
 			} else if (olddisk == NULL)
 				olddisk = tmpdisk;
 		}
 		if (olddisk == NULL) {
 nofit:
 			g_raid_change_disk_state(disk, G_RAID_DISK_S_SPARE);
 			return (1);
 		}
 		oldpd = (struct g_raid_md_nvidia_perdisk *)olddisk->d_md_data;
 		disk_pos = oldpd->pd_disk_pos;
 		resurrection = 1;
 	}
 
 	if (olddisk == NULL) {
 		/* Find placeholder by position. */
 		olddisk = g_raid_md_nvidia_get_disk(sc, disk_pos);
 		if (olddisk == NULL)
 			panic("No disk at position %d!", disk_pos);
 		if (olddisk->d_state != G_RAID_DISK_S_OFFLINE) {
 			G_RAID_DEBUG1(1, sc, "More than one disk for pos %d",
 			    disk_pos);
 			g_raid_change_disk_state(disk, G_RAID_DISK_S_STALE);
 			return (0);
 		}
 		oldpd = (struct g_raid_md_nvidia_perdisk *)olddisk->d_md_data;
 	}
 
 	/* Replace failed disk or placeholder with new disk. */
 	TAILQ_FOREACH_SAFE(sd, &olddisk->d_subdisks, sd_next, tmpsd) {
 		TAILQ_REMOVE(&olddisk->d_subdisks, sd, sd_next);
 		TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next);
 		sd->sd_disk = disk;
 	}
 	oldpd->pd_disk_pos = -2;
 	pd->pd_disk_pos = disk_pos;
 
 	/* If it was placeholder -- destroy it. */
 	if (olddisk->d_state == G_RAID_DISK_S_OFFLINE) {
 		g_raid_destroy_disk(olddisk);
 	} else {
 		/* Otherwise, make it STALE_FAILED. */
 		g_raid_change_disk_state(olddisk, G_RAID_DISK_S_STALE_FAILED);
 	}
 
 	/* Welcome the new disk. */
 	if (resurrection)
 		g_raid_change_disk_state(disk, G_RAID_DISK_S_ACTIVE);
 	else// if (pd->pd_meta->disk_status == NVIDIA_S_CURRENT ||
 	    //pd->pd_meta->disk_status == NVIDIA_S_REBUILD)
 		g_raid_change_disk_state(disk, G_RAID_DISK_S_ACTIVE);
 //	else
 //		g_raid_change_disk_state(disk, G_RAID_DISK_S_FAILED);
 	TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) {
 
 		/*
 		 * Different disks may have different sizes,
 		 * in concat mode. Update from real disk size.
 		 */
 		if (meta->type == NVIDIA_T_CONCAT)
 			sd->sd_size = pd->pd_disk_size - 0x800 * 512;
 
 		if (resurrection) {
 			/* New or ex-spare disk. */
 			g_raid_change_subdisk_state(sd,
 			    G_RAID_SUBDISK_S_NEW);
 		} else if (meta->state == NVIDIA_S_REBUILD &&
 		    (pd->pd_meta->disk_status & 0x100)) {
 			/* Rebuilding disk. */
 			g_raid_change_subdisk_state(sd,
 			    G_RAID_SUBDISK_S_REBUILD);
 			sd->sd_rebuild_pos = (off_t)pd->pd_meta->rebuild_lba /
 			    meta->array_width * pd->pd_meta->sector_size;
 		} else if (meta->state == NVIDIA_S_SYNC) {
 			/* Resyncing/dirty disk. */
 			g_raid_change_subdisk_state(sd,
 			    G_RAID_SUBDISK_S_RESYNC);
 			sd->sd_rebuild_pos = (off_t)pd->pd_meta->rebuild_lba /
 			    meta->array_width * pd->pd_meta->sector_size;
 		} else {
 			/* Up to date disk. */
 			g_raid_change_subdisk_state(sd,
 			    G_RAID_SUBDISK_S_ACTIVE);
 		}
 		g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW,
 		    G_RAID_EVENT_SUBDISK);
 	}
 
 	/* Update status of our need for spare. */
 	if (mdi->mdio_started) {
 		mdi->mdio_incomplete =
 		    (g_raid_ndisks(sc, G_RAID_DISK_S_ACTIVE) <
 		     mdi->mdio_total_disks);
 	}
 
 	return (resurrection);
 }
 
 static void
 g_disk_md_nvidia_retaste(void *arg, int pending)
 {
 
 	G_RAID_DEBUG(1, "Array is not complete, trying to retaste.");
 	g_retaste(&g_raid_class);
 	free(arg, M_MD_NVIDIA);
 }
 
 static void
 g_raid_md_nvidia_refill(struct g_raid_softc *sc)
 {
 	struct g_raid_md_object *md;
 	struct g_raid_md_nvidia_object *mdi;
 	struct g_raid_disk *disk;
 	struct task *task;
 	int update, na;
 
 	md = sc->sc_md;
 	mdi = (struct g_raid_md_nvidia_object *)md;
 	update = 0;
 	do {
 		/* Make sure we miss anything. */
 		na = g_raid_ndisks(sc, G_RAID_DISK_S_ACTIVE);
 		if (na == mdi->mdio_total_disks)
 			break;
 
 		G_RAID_DEBUG1(1, md->mdo_softc,
 		    "Array is not complete (%d of %d), "
 		    "trying to refill.", na, mdi->mdio_total_disks);
 
 		/* Try to get use some of STALE disks. */
 		TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 			if (disk->d_state == G_RAID_DISK_S_STALE) {
 				update += g_raid_md_nvidia_start_disk(disk);
 				if (disk->d_state == G_RAID_DISK_S_ACTIVE)
 					break;
 			}
 		}
 		if (disk != NULL)
 			continue;
 
 		/* Try to get use some of SPARE disks. */
 		TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 			if (disk->d_state == G_RAID_DISK_S_SPARE) {
 				update += g_raid_md_nvidia_start_disk(disk);
 				if (disk->d_state == G_RAID_DISK_S_ACTIVE)
 					break;
 			}
 		}
 	} while (disk != NULL);
 
 	/* Write new metadata if we changed something. */
 	if (update)
 		g_raid_md_write_nvidia(md, NULL, NULL, NULL);
 
 	/* Update status of our need for spare. */
 	mdi->mdio_incomplete = (g_raid_ndisks(sc, G_RAID_DISK_S_ACTIVE) <
 	    mdi->mdio_total_disks);
 
 	/* Request retaste hoping to find spare. */
 	if (mdi->mdio_incomplete) {
 		task = malloc(sizeof(struct task),
 		    M_MD_NVIDIA, M_WAITOK | M_ZERO);
 		TASK_INIT(task, 0, g_disk_md_nvidia_retaste, task);
 		taskqueue_enqueue(taskqueue_swi, task);
 	}
 }
 
 static void
 g_raid_md_nvidia_start(struct g_raid_softc *sc)
 {
 	struct g_raid_md_object *md;
 	struct g_raid_md_nvidia_object *mdi;
 	struct g_raid_md_nvidia_perdisk *pd;
 	struct nvidia_raid_conf *meta;
 	struct g_raid_volume *vol;
 	struct g_raid_subdisk *sd;
 	struct g_raid_disk *disk;
 	off_t size;
 	int j, disk_pos;
 	char buf[17];
 
 	md = sc->sc_md;
 	mdi = (struct g_raid_md_nvidia_object *)md;
 	meta = mdi->mdio_meta;
 
 	/* Create volumes and subdisks. */
 	nvidia_meta_get_name(meta, buf);
 	vol = g_raid_create_volume(sc, buf, -1);
 	vol->v_mediasize = (off_t)meta->total_sectors * 512;
 	vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_NONE;
 	if (meta->type == NVIDIA_T_RAID0) {
 		vol->v_raid_level = G_RAID_VOLUME_RL_RAID0;
 		size = vol->v_mediasize / mdi->mdio_total_disks;
 	} else if (meta->type == NVIDIA_T_RAID1) {
 		vol->v_raid_level = G_RAID_VOLUME_RL_RAID1;
 		size = vol->v_mediasize;
 	} else if (meta->type == NVIDIA_T_RAID01) {
 		vol->v_raid_level = G_RAID_VOLUME_RL_RAID1E;
 		size = vol->v_mediasize / (mdi->mdio_total_disks / 2);
 	} else if (meta->type == NVIDIA_T_CONCAT) {
 		if (mdi->mdio_total_disks == 1)
 			vol->v_raid_level = G_RAID_VOLUME_RL_SINGLE;
 		else
 			vol->v_raid_level = G_RAID_VOLUME_RL_CONCAT;
 		size = 0;
 	} else if (meta->type == NVIDIA_T_RAID5) {
 		vol->v_raid_level = G_RAID_VOLUME_RL_RAID5;
 		vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_R5LA;
 		size = vol->v_mediasize / (mdi->mdio_total_disks - 1);
 	} else if (meta->type == NVIDIA_T_RAID5_SYM) {
 		vol->v_raid_level = G_RAID_VOLUME_RL_RAID5;
 		vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_R5LS;
 		size = vol->v_mediasize / (mdi->mdio_total_disks - 1);
 	} else {
 		vol->v_raid_level = G_RAID_VOLUME_RL_UNKNOWN;
 		size = 0;
 	}
 	vol->v_strip_size = meta->strip_sectors * 512; //ZZZ
 	vol->v_disks_count = mdi->mdio_total_disks;
 	vol->v_sectorsize = 512; //ZZZ
 	for (j = 0; j < vol->v_disks_count; j++) {
 		sd = &vol->v_subdisks[j];
 		sd->sd_offset = 0;
 		sd->sd_size = size;
 	}
 	g_raid_start_volume(vol);
 
 	/* Create disk placeholders to store data for later writing. */
 	for (disk_pos = 0; disk_pos < mdi->mdio_total_disks; disk_pos++) {
 		pd = malloc(sizeof(*pd), M_MD_NVIDIA, M_WAITOK | M_ZERO);
 		pd->pd_disk_pos = disk_pos;
 		disk = g_raid_create_disk(sc);
 		disk->d_md_data = (void *)pd;
 		disk->d_state = G_RAID_DISK_S_OFFLINE;
 		sd = &vol->v_subdisks[disk_pos];
 		sd->sd_disk = disk;
 		TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next);
 	}
 
 	/* Make all disks found till the moment take their places. */
 	do {
 		TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 			if (disk->d_state == G_RAID_DISK_S_NONE) {
 				g_raid_md_nvidia_start_disk(disk);
 				break;
 			}
 		}
 	} while (disk != NULL);
 
 	mdi->mdio_started = 1;
 	G_RAID_DEBUG1(0, sc, "Array started.");
 	g_raid_md_write_nvidia(md, NULL, NULL, NULL);
 
 	/* Pickup any STALE/SPARE disks to refill array if needed. */
 	g_raid_md_nvidia_refill(sc);
 
 	g_raid_event_send(vol, G_RAID_VOLUME_E_START, G_RAID_EVENT_VOLUME);
 
 	callout_stop(&mdi->mdio_start_co);
 	G_RAID_DEBUG1(1, sc, "root_mount_rel %p", mdi->mdio_rootmount);
 	root_mount_rel(mdi->mdio_rootmount);
 	mdi->mdio_rootmount = NULL;
 }
 
 static void
 g_raid_md_nvidia_new_disk(struct g_raid_disk *disk)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_md_object *md;
 	struct g_raid_md_nvidia_object *mdi;
 	struct nvidia_raid_conf *pdmeta;
 	struct g_raid_md_nvidia_perdisk *pd;
 
 	sc = disk->d_softc;
 	md = sc->sc_md;
 	mdi = (struct g_raid_md_nvidia_object *)md;
 	pd = (struct g_raid_md_nvidia_perdisk *)disk->d_md_data;
 	pdmeta = pd->pd_meta;
 
 	if (mdi->mdio_started) {
 		if (g_raid_md_nvidia_start_disk(disk))
 			g_raid_md_write_nvidia(md, NULL, NULL, NULL);
 	} else {
 		if (mdi->mdio_meta == NULL ||
 		    mdi->mdio_meta->disk_number >= mdi->mdio_meta->total_disks) {
 			G_RAID_DEBUG1(1, sc, "Newer disk");
 			if (mdi->mdio_meta != NULL)
 				free(mdi->mdio_meta, M_MD_NVIDIA);
 			mdi->mdio_meta = nvidia_meta_copy(pdmeta);
 			mdi->mdio_total_disks = pdmeta->total_disks;
 			mdi->mdio_disks_present = 1;
 		} else if (pdmeta->disk_number < mdi->mdio_meta->total_disks) {
 			mdi->mdio_disks_present++;
 			G_RAID_DEBUG1(1, sc, "Matching disk (%d of %d up)",
 			    mdi->mdio_disks_present,
 			    mdi->mdio_total_disks);
 		} else
 			G_RAID_DEBUG1(1, sc, "Spare disk");
 
 		/* If we collected all needed disks - start array. */
 		if (mdi->mdio_disks_present == mdi->mdio_total_disks)
 			g_raid_md_nvidia_start(sc);
 	}
 }
 
 static void
 g_raid_nvidia_go(void *arg)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_md_object *md;
 	struct g_raid_md_nvidia_object *mdi;
 
 	sc = arg;
 	md = sc->sc_md;
 	mdi = (struct g_raid_md_nvidia_object *)md;
 	if (!mdi->mdio_started) {
 		G_RAID_DEBUG1(0, sc, "Force array start due to timeout.");
 		g_raid_event_send(sc, G_RAID_NODE_E_START, 0);
 	}
 }
 
 static int
 g_raid_md_create_nvidia(struct g_raid_md_object *md, struct g_class *mp,
     struct g_geom **gp)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_md_nvidia_object *mdi;
 	char name[32];
 
 	mdi = (struct g_raid_md_nvidia_object *)md;
 	arc4rand(&mdi->mdio_volume_id, 16, 0);
 	snprintf(name, sizeof(name), "NVIDIA-%d",
 	    atomic_fetchadd_int(&NVIDIANodeID, 1));
 	sc = g_raid_create_node(mp, name, md);
 	if (sc == NULL)
 		return (G_RAID_MD_TASTE_FAIL);
 	md->mdo_softc = sc;
 	*gp = sc->sc_geom;
 	return (G_RAID_MD_TASTE_NEW);
 }
 
 static int
 g_raid_md_taste_nvidia(struct g_raid_md_object *md, struct g_class *mp,
                               struct g_consumer *cp, struct g_geom **gp)
 {
 	struct g_consumer *rcp;
 	struct g_provider *pp;
 	struct g_raid_md_nvidia_object *mdi, *mdi1;
 	struct g_raid_softc *sc;
 	struct g_raid_disk *disk;
 	struct nvidia_raid_conf *meta;
 	struct g_raid_md_nvidia_perdisk *pd;
 	struct g_geom *geom;
 	int result, spare, len;
 	char name[32];
 	uint16_t vendor;
 
 	G_RAID_DEBUG(1, "Tasting NVIDIA on %s", cp->provider->name);
 	mdi = (struct g_raid_md_nvidia_object *)md;
 	pp = cp->provider;
 
 	/* Read metadata from device. */
 	meta = NULL;
 	g_topology_unlock();
 	vendor = 0xffff;
 	len = sizeof(vendor);
 	if (pp->geom->rank == 1)
 		g_io_getattr("GEOM::hba_vendor", cp, &len, &vendor);
 	meta = nvidia_meta_read(cp);
 	g_topology_lock();
 	if (meta == NULL) {
 		if (g_raid_aggressive_spare) {
 			if (vendor == 0x10de) {
 				G_RAID_DEBUG(1,
 				    "No NVIDIA metadata, forcing spare.");
 				spare = 2;
 				goto search;
 			} else {
 				G_RAID_DEBUG(1,
 				    "NVIDIA vendor mismatch 0x%04x != 0x10de",
 				    vendor);
 			}
 		}
 		return (G_RAID_MD_TASTE_FAIL);
 	}
 
 	/* Metadata valid. Print it. */
 	g_raid_md_nvidia_print(meta);
 	G_RAID_DEBUG(1, "NVIDIA disk position %d", meta->disk_number);
 	spare = 0;//(meta->type == NVIDIA_T_SPARE) ? 1 : 0;
 
 search:
 	/* Search for matching node. */
 	sc = NULL;
 	mdi1 = NULL;
 	LIST_FOREACH(geom, &mp->geom, geom) {
 		sc = geom->softc;
 		if (sc == NULL)
 			continue;
 		if (sc->sc_stopping != 0)
 			continue;
 		if (sc->sc_md->mdo_class != md->mdo_class)
 			continue;
 		mdi1 = (struct g_raid_md_nvidia_object *)sc->sc_md;
 		if (spare) {
 			if (mdi1->mdio_incomplete)
 				break;
 		} else {
 			if (memcmp(&mdi1->mdio_volume_id,
 			     &meta->volume_id, 16) == 0)
 				break;
 		}
 	}
 
 	/* Found matching node. */
 	if (geom != NULL) {
 		G_RAID_DEBUG(1, "Found matching array %s", sc->sc_name);
 		result = G_RAID_MD_TASTE_EXISTING;
 
 	} else if (spare) { /* Not found needy node -- left for later. */
 		G_RAID_DEBUG(1, "Spare is not needed at this time");
 		goto fail1;
 
 	} else { /* Not found matching node -- create one. */
 		result = G_RAID_MD_TASTE_NEW;
 		memcpy(&mdi->mdio_volume_id, &meta->volume_id, 16);
 		snprintf(name, sizeof(name), "NVIDIA-%d",
 		    atomic_fetchadd_int(&NVIDIANodeID, 1));
 		sc = g_raid_create_node(mp, name, md);
 		md->mdo_softc = sc;
 		geom = sc->sc_geom;
 		callout_init(&mdi->mdio_start_co, 1);
 		callout_reset(&mdi->mdio_start_co, g_raid_start_timeout * hz,
 		    g_raid_nvidia_go, sc);
 		mdi->mdio_rootmount = root_mount_hold("GRAID-NVIDIA");
 		G_RAID_DEBUG1(1, sc, "root_mount_hold %p", mdi->mdio_rootmount);
 	}
 
 	/* There is no return after this point, so we close passed consumer. */
 	g_access(cp, -1, 0, 0);
 
 	rcp = g_new_consumer(geom);
 	rcp->flags |= G_CF_DIRECT_RECEIVE;
 	g_attach(rcp, pp);
 	if (g_access(rcp, 1, 1, 1) != 0)
 		; //goto fail1;
 
 	g_topology_unlock();
 	sx_xlock(&sc->sc_lock);
 
 	pd = malloc(sizeof(*pd), M_MD_NVIDIA, M_WAITOK | M_ZERO);
 	pd->pd_meta = meta;
 	if (spare == 2) {
 		pd->pd_disk_pos = -3;
 	} else {
 		pd->pd_disk_pos = -1;
 	}
 	pd->pd_disk_size = pp->mediasize;
 	disk = g_raid_create_disk(sc);
 	disk->d_md_data = (void *)pd;
 	disk->d_consumer = rcp;
 	rcp->private = disk;
 
 	g_raid_get_disk_info(disk);
 
 	g_raid_md_nvidia_new_disk(disk);
 
 	sx_xunlock(&sc->sc_lock);
 	g_topology_lock();
 	*gp = geom;
 	return (result);
 fail1:
 	free(meta, M_MD_NVIDIA);
 	return (G_RAID_MD_TASTE_FAIL);
 }
 
 static int
 g_raid_md_event_nvidia(struct g_raid_md_object *md,
     struct g_raid_disk *disk, u_int event)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_subdisk *sd;
 	struct g_raid_md_nvidia_object *mdi;
 	struct g_raid_md_nvidia_perdisk *pd;
 
 	sc = md->mdo_softc;
 	mdi = (struct g_raid_md_nvidia_object *)md;
 	if (disk == NULL) {
 		switch (event) {
 		case G_RAID_NODE_E_START:
 			if (!mdi->mdio_started) {
 				/* Bump volume ID to drop missing disks. */
 				arc4rand(&mdi->mdio_volume_id, 16, 0);
 				g_raid_md_nvidia_start(sc);
 			}
 			return (0);
 		}
 		return (-1);
 	}
 	pd = (struct g_raid_md_nvidia_perdisk *)disk->d_md_data;
 	switch (event) {
 	case G_RAID_DISK_E_DISCONNECTED:
 		/* If disk was assigned, just update statuses. */
 		if (pd->pd_disk_pos >= 0) {
 			g_raid_change_disk_state(disk, G_RAID_DISK_S_OFFLINE);
 			if (disk->d_consumer) {
 				g_raid_kill_consumer(sc, disk->d_consumer);
 				disk->d_consumer = NULL;
 			}
 			TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) {
 				g_raid_change_subdisk_state(sd,
 				    G_RAID_SUBDISK_S_NONE);
 				g_raid_event_send(sd, G_RAID_SUBDISK_E_DISCONNECTED,
 				    G_RAID_EVENT_SUBDISK);
 			}
 		} else {
 			/* Otherwise -- delete. */
 			g_raid_change_disk_state(disk, G_RAID_DISK_S_NONE);
 			g_raid_destroy_disk(disk);
 		}
 
 		if (mdi->mdio_started) {
 			/* Bump volume ID to prevent disk resurrection. */
 			if (pd->pd_disk_pos >= 0)
 				arc4rand(&mdi->mdio_volume_id, 16, 0);
 
 			/* Write updated metadata to all disks. */
 			g_raid_md_write_nvidia(md, NULL, NULL, NULL);
 		}
 
 		/* Check if anything left except placeholders. */
 		if (g_raid_ndisks(sc, -1) ==
 		    g_raid_ndisks(sc, G_RAID_DISK_S_OFFLINE))
 			g_raid_destroy_node(sc, 0);
 		else
 			g_raid_md_nvidia_refill(sc);
 		return (0);
 	}
 	return (-2);
 }
 
 static int
 g_raid_md_ctl_nvidia(struct g_raid_md_object *md,
     struct gctl_req *req)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_volume *vol;
 	struct g_raid_subdisk *sd;
 	struct g_raid_disk *disk;
 	struct g_raid_md_nvidia_object *mdi;
 	struct g_raid_md_nvidia_perdisk *pd;
 	struct g_consumer *cp;
 	struct g_provider *pp;
 	char arg[16];
 	const char *verb, *volname, *levelname, *diskname;
 	int *nargs, *force;
 	off_t size, sectorsize, strip, volsize;
 	intmax_t *sizearg, *striparg;
 	int numdisks, i, len, level, qual, update;
 	int error;
 
 	sc = md->mdo_softc;
 	mdi = (struct g_raid_md_nvidia_object *)md;
 	verb = gctl_get_param(req, "verb", NULL);
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	error = 0;
 	if (strcmp(verb, "label") == 0) {
 
 		if (*nargs < 4) {
 			gctl_error(req, "Invalid number of arguments.");
 			return (-1);
 		}
 		volname = gctl_get_asciiparam(req, "arg1");
 		if (volname == NULL) {
 			gctl_error(req, "No volume name.");
 			return (-2);
 		}
 		levelname = gctl_get_asciiparam(req, "arg2");
 		if (levelname == NULL) {
 			gctl_error(req, "No RAID level.");
 			return (-3);
 		}
 		if (strcasecmp(levelname, "RAID5") == 0)
 			levelname = "RAID5-LS";
 		if (g_raid_volume_str2level(levelname, &level, &qual)) {
 			gctl_error(req, "Unknown RAID level '%s'.", levelname);
 			return (-4);
 		}
 		numdisks = *nargs - 3;
 		force = gctl_get_paraml(req, "force", sizeof(*force));
 		if (!g_raid_md_nvidia_supported(level, qual, numdisks,
 		    force ? *force : 0)) {
 			gctl_error(req, "Unsupported RAID level "
 			    "(0x%02x/0x%02x), or number of disks (%d).",
 			    level, qual, numdisks);
 			return (-5);
 		}
 
 		/* Search for disks, connect them and probe. */
 		size = 0x7fffffffffffffffllu;
 		sectorsize = 0;
 		for (i = 0; i < numdisks; i++) {
 			snprintf(arg, sizeof(arg), "arg%d", i + 3);
 			diskname = gctl_get_asciiparam(req, arg);
 			if (diskname == NULL) {
 				gctl_error(req, "No disk name (%s).", arg);
 				error = -6;
 				break;
 			}
 			if (strcmp(diskname, "NONE") == 0) {
 				cp = NULL;
 				pp = NULL;
 			} else {
 				g_topology_lock();
 				cp = g_raid_open_consumer(sc, diskname);
 				if (cp == NULL) {
 					gctl_error(req, "Can't open '%s'.",
 					    diskname);
 					g_topology_unlock();
 					error = -7;
 					break;
 				}
 				pp = cp->provider;
 			}
 			pd = malloc(sizeof(*pd), M_MD_NVIDIA, M_WAITOK | M_ZERO);
 			pd->pd_disk_pos = i;
 			disk = g_raid_create_disk(sc);
 			disk->d_md_data = (void *)pd;
 			disk->d_consumer = cp;
 			if (cp == NULL)
 				continue;
 			cp->private = disk;
 			g_topology_unlock();
 
 			g_raid_get_disk_info(disk);
 
 			pd->pd_disk_size = pp->mediasize;
 			if (size > pp->mediasize)
 				size = pp->mediasize;
 			if (sectorsize < pp->sectorsize)
 				sectorsize = pp->sectorsize;
 		}
 		if (error != 0)
 			return (error);
 
 		if (sectorsize <= 0) {
 			gctl_error(req, "Can't get sector size.");
 			return (-8);
 		}
 
 		/* Reserve space for metadata. */
 		size -= 2 * sectorsize;
 
 		/* Handle size argument. */
 		len = sizeof(*sizearg);
 		sizearg = gctl_get_param(req, "size", &len);
 		if (sizearg != NULL && len == sizeof(*sizearg) &&
 		    *sizearg > 0) {
 			if (*sizearg > size) {
 				gctl_error(req, "Size too big %lld > %lld.",
 				    (long long)*sizearg, (long long)size);
 				return (-9);
 			}
 			size = *sizearg;
 		}
 
 		/* Handle strip argument. */
 		strip = 131072;
 		len = sizeof(*striparg);
 		striparg = gctl_get_param(req, "strip", &len);
 		if (striparg != NULL && len == sizeof(*striparg) &&
 		    *striparg > 0) {
 			if (*striparg < sectorsize) {
 				gctl_error(req, "Strip size too small.");
 				return (-10);
 			}
 			if (*striparg % sectorsize != 0) {
 				gctl_error(req, "Incorrect strip size.");
 				return (-11);
 			}
 			if (strip > 65535 * sectorsize) {
 				gctl_error(req, "Strip size too big.");
 				return (-12);
 			}
 			strip = *striparg;
 		}
 
 		/* Round size down to strip or sector. */
 		if (level == G_RAID_VOLUME_RL_RAID1)
 			size -= (size % sectorsize);
 		else if (level == G_RAID_VOLUME_RL_RAID1E &&
 		    (numdisks & 1) != 0)
 			size -= (size % (2 * strip));
 		else
 			size -= (size % strip);
 		if (size <= 0) {
 			gctl_error(req, "Size too small.");
 			return (-13);
 		}
 
 		if (level == G_RAID_VOLUME_RL_RAID0 ||
 		    level == G_RAID_VOLUME_RL_CONCAT ||
 		    level == G_RAID_VOLUME_RL_SINGLE)
 			volsize = size * numdisks;
 		else if (level == G_RAID_VOLUME_RL_RAID1)
 			volsize = size;
 		else if (level == G_RAID_VOLUME_RL_RAID5)
 			volsize = size * (numdisks - 1);
 		else { /* RAID1E */
 			volsize = ((size * numdisks) / strip / 2) *
 			    strip;
 		}
 		if (volsize > 0xffffffffllu * sectorsize) {
 			gctl_error(req, "Size too big.");
 			return (-14);
 		}
 
 		/* We have all we need, create things: volume, ... */
 		mdi->mdio_total_disks = numdisks;
 		mdi->mdio_started = 1;
 		vol = g_raid_create_volume(sc, volname, -1);
 		vol->v_md_data = (void *)(intptr_t)0;
 		vol->v_raid_level = level;
 		vol->v_raid_level_qualifier = qual;
 		vol->v_strip_size = strip;
 		vol->v_disks_count = numdisks;
 		vol->v_mediasize = volsize;
 		vol->v_sectorsize = sectorsize;
 		g_raid_start_volume(vol);
 
 		/* , and subdisks. */
 		TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 			pd = (struct g_raid_md_nvidia_perdisk *)disk->d_md_data;
 			sd = &vol->v_subdisks[pd->pd_disk_pos];
 			sd->sd_disk = disk;
 			sd->sd_offset = 0;
 			sd->sd_size = size;
 			TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next);
 			if (sd->sd_disk->d_consumer != NULL) {
 				g_raid_change_disk_state(disk,
 				    G_RAID_DISK_S_ACTIVE);
 				g_raid_change_subdisk_state(sd,
 				    G_RAID_SUBDISK_S_ACTIVE);
 				g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW,
 				    G_RAID_EVENT_SUBDISK);
 			} else {
 				g_raid_change_disk_state(disk, G_RAID_DISK_S_OFFLINE);
 			}
 		}
 
 		/* Write metadata based on created entities. */
 		G_RAID_DEBUG1(0, sc, "Array started.");
 		g_raid_md_write_nvidia(md, NULL, NULL, NULL);
 
 		/* Pickup any STALE/SPARE disks to refill array if needed. */
 		g_raid_md_nvidia_refill(sc);
 
 		g_raid_event_send(vol, G_RAID_VOLUME_E_START,
 		    G_RAID_EVENT_VOLUME);
 		return (0);
 	}
 	if (strcmp(verb, "delete") == 0) {
 
 		/* Check if some volume is still open. */
 		force = gctl_get_paraml(req, "force", sizeof(*force));
 		if (force != NULL && *force == 0 &&
 		    g_raid_nopens(sc) != 0) {
 			gctl_error(req, "Some volume is still open.");
 			return (-4);
 		}
 
 		TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 			if (disk->d_consumer)
 				nvidia_meta_erase(disk->d_consumer);
 		}
 		g_raid_destroy_node(sc, 0);
 		return (0);
 	}
 	if (strcmp(verb, "remove") == 0 ||
 	    strcmp(verb, "fail") == 0) {
 		if (*nargs < 2) {
 			gctl_error(req, "Invalid number of arguments.");
 			return (-1);
 		}
 		for (i = 1; i < *nargs; i++) {
 			snprintf(arg, sizeof(arg), "arg%d", i);
 			diskname = gctl_get_asciiparam(req, arg);
 			if (diskname == NULL) {
 				gctl_error(req, "No disk name (%s).", arg);
 				error = -2;
 				break;
 			}
 			if (strncmp(diskname, "/dev/", 5) == 0)
 				diskname += 5;
 
 			TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 				if (disk->d_consumer != NULL && 
 				    disk->d_consumer->provider != NULL &&
 				    strcmp(disk->d_consumer->provider->name,
 				     diskname) == 0)
 					break;
 			}
 			if (disk == NULL) {
 				gctl_error(req, "Disk '%s' not found.",
 				    diskname);
 				error = -3;
 				break;
 			}
 
 			if (strcmp(verb, "fail") == 0) {
 				g_raid_md_fail_disk_nvidia(md, NULL, disk);
 				continue;
 			}
 
 			pd = (struct g_raid_md_nvidia_perdisk *)disk->d_md_data;
 
 			/* Erase metadata on deleting disk. */
 			nvidia_meta_erase(disk->d_consumer);
 
 			/* If disk was assigned, just update statuses. */
 			if (pd->pd_disk_pos >= 0) {
 				g_raid_change_disk_state(disk, G_RAID_DISK_S_OFFLINE);
 				g_raid_kill_consumer(sc, disk->d_consumer);
 				disk->d_consumer = NULL;
 				TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) {
 					g_raid_change_subdisk_state(sd,
 					    G_RAID_SUBDISK_S_NONE);
 					g_raid_event_send(sd, G_RAID_SUBDISK_E_DISCONNECTED,
 					    G_RAID_EVENT_SUBDISK);
 				}
 			} else {
 				/* Otherwise -- delete. */
 				g_raid_change_disk_state(disk, G_RAID_DISK_S_NONE);
 				g_raid_destroy_disk(disk);
 			}
 		}
 
 		/* Write updated metadata to remaining disks. */
 		g_raid_md_write_nvidia(md, NULL, NULL, NULL);
 
 		/* Check if anything left except placeholders. */
 		if (g_raid_ndisks(sc, -1) ==
 		    g_raid_ndisks(sc, G_RAID_DISK_S_OFFLINE))
 			g_raid_destroy_node(sc, 0);
 		else
 			g_raid_md_nvidia_refill(sc);
 		return (error);
 	}
 	if (strcmp(verb, "insert") == 0) {
 		if (*nargs < 2) {
 			gctl_error(req, "Invalid number of arguments.");
 			return (-1);
 		}
 		update = 0;
 		for (i = 1; i < *nargs; i++) {
 			/* Get disk name. */
 			snprintf(arg, sizeof(arg), "arg%d", i);
 			diskname = gctl_get_asciiparam(req, arg);
 			if (diskname == NULL) {
 				gctl_error(req, "No disk name (%s).", arg);
 				error = -3;
 				break;
 			}
 
 			/* Try to find provider with specified name. */
 			g_topology_lock();
 			cp = g_raid_open_consumer(sc, diskname);
 			if (cp == NULL) {
 				gctl_error(req, "Can't open disk '%s'.",
 				    diskname);
 				g_topology_unlock();
 				error = -4;
 				break;
 			}
 			pp = cp->provider;
 
 			pd = malloc(sizeof(*pd), M_MD_NVIDIA, M_WAITOK | M_ZERO);
 			pd->pd_disk_pos = -3;
 			pd->pd_disk_size = pp->mediasize;
 
 			disk = g_raid_create_disk(sc);
 			disk->d_consumer = cp;
 			disk->d_md_data = (void *)pd;
 			cp->private = disk;
 			g_topology_unlock();
 
 			g_raid_get_disk_info(disk);
 
 			/* Welcome the "new" disk. */
 			update += g_raid_md_nvidia_start_disk(disk);
 			if (disk->d_state != G_RAID_DISK_S_SPARE &&
 			    disk->d_state != G_RAID_DISK_S_ACTIVE) {
 				gctl_error(req, "Disk '%s' doesn't fit.",
 				    diskname);
 				g_raid_destroy_disk(disk);
 				error = -8;
 				break;
 			}
 		}
 
 		/* Write new metadata if we changed something. */
 		if (update)
 			g_raid_md_write_nvidia(md, NULL, NULL, NULL);
 		return (error);
 	}
 	gctl_error(req, "Command '%s' is not supported.", verb);
 	return (-100);
 }
 
 static int
 g_raid_md_write_nvidia(struct g_raid_md_object *md, struct g_raid_volume *tvol,
     struct g_raid_subdisk *tsd, struct g_raid_disk *tdisk)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_volume *vol;
 	struct g_raid_subdisk *sd;
 	struct g_raid_disk *disk;
 	struct g_raid_md_nvidia_object *mdi;
 	struct g_raid_md_nvidia_perdisk *pd;
 	struct nvidia_raid_conf *meta;
 	int i, spares;
 
 	sc = md->mdo_softc;
 	mdi = (struct g_raid_md_nvidia_object *)md;
 
 	if (sc->sc_stopping == G_RAID_DESTROY_HARD)
 		return (0);
 
 	/* There is only one volume. */
 	vol = TAILQ_FIRST(&sc->sc_volumes);
 
 	/* Fill global fields. */
 	meta = malloc(sizeof(*meta), M_MD_NVIDIA, M_WAITOK | M_ZERO);
 	if (mdi->mdio_meta)
 		memcpy(meta, mdi->mdio_meta, sizeof(*meta));
 	memcpy(meta->nvidia_id, NVIDIA_MAGIC, sizeof(NVIDIA_MAGIC) - 1);
 	meta->config_size = 30;
 	meta->version = 0x0064;
 	meta->total_sectors = vol->v_mediasize / vol->v_sectorsize;
 	meta->sector_size = vol->v_sectorsize;
 	nvidia_meta_put_name(meta, vol->v_name);
 	meta->magic_0 = NVIDIA_MAGIC0;
 	memcpy(&meta->volume_id, &mdi->mdio_volume_id, 16);
 	meta->state = NVIDIA_S_IDLE;
 	if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1)
 		meta->array_width = 1;
 	else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1E)
 		meta->array_width = vol->v_disks_count / 2;
 	else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID5)
 		meta->array_width = vol->v_disks_count - 1;
 	else
 		meta->array_width = vol->v_disks_count;
 	meta->total_disks = vol->v_disks_count;
 	meta->orig_array_width = meta->array_width;
 	if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID0)
 		meta->type = NVIDIA_T_RAID0;
 	else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1)
 		meta->type = NVIDIA_T_RAID1;
 	else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1E)
 		meta->type = NVIDIA_T_RAID01;
 	else if (vol->v_raid_level == G_RAID_VOLUME_RL_CONCAT ||
 	    vol->v_raid_level == G_RAID_VOLUME_RL_SINGLE)
 		meta->type = NVIDIA_T_CONCAT;
 	else if (vol->v_raid_level_qualifier == G_RAID_VOLUME_RLQ_R5LA)
 		meta->type = NVIDIA_T_RAID5;
 	else
 		meta->type = NVIDIA_T_RAID5_SYM;
 	meta->strip_sectors = vol->v_strip_size / vol->v_sectorsize;
 	meta->strip_bytes = vol->v_strip_size;
 	meta->strip_shift = ffs(meta->strip_sectors) - 1;
 	meta->strip_mask = meta->strip_sectors - 1;
 	meta->stripe_sectors = meta->strip_sectors * meta->orig_array_width;
 	meta->stripe_bytes = meta->stripe_sectors * vol->v_sectorsize;
 	meta->rebuild_lba = 0;
 	meta->orig_type = meta->type;
 	meta->orig_total_sectors = meta->total_sectors;
 	meta->status = 0;
 
 	for (i = 0; i < vol->v_disks_count; i++) {
 		sd = &vol->v_subdisks[i];
 		if ((sd->sd_state == G_RAID_SUBDISK_S_STALE ||
 		     sd->sd_state == G_RAID_SUBDISK_S_RESYNC ||
 		     vol->v_dirty) &&
 		     meta->state != NVIDIA_S_REBUILD)
 			meta->state = NVIDIA_S_SYNC;
 		else if (sd->sd_state == G_RAID_SUBDISK_S_NEW ||
 		     sd->sd_state == G_RAID_SUBDISK_S_REBUILD)
 			meta->state = NVIDIA_S_REBUILD;
 	}
 
 	/* We are done. Print meta data and store them to disks. */
 	if (mdi->mdio_meta != NULL)
 		free(mdi->mdio_meta, M_MD_NVIDIA);
 	mdi->mdio_meta = meta;
 	spares = 0;
 	TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 		pd = (struct g_raid_md_nvidia_perdisk *)disk->d_md_data;
 		if (disk->d_state != G_RAID_DISK_S_ACTIVE &&
 		    disk->d_state != G_RAID_DISK_S_SPARE)
 			continue;
 		if (pd->pd_meta != NULL) {
 			free(pd->pd_meta, M_MD_NVIDIA);
 			pd->pd_meta = NULL;
 		}
 		pd->pd_meta = nvidia_meta_copy(meta);
 		if ((sd = TAILQ_FIRST(&disk->d_subdisks)) != NULL) {
 			/* For RAID0+1 we need to translate order. */
 			pd->pd_meta->disk_number =
 			    nvidia_meta_translate_disk(meta, sd->sd_pos);
 			if (sd->sd_state != G_RAID_SUBDISK_S_ACTIVE) {
 				pd->pd_meta->disk_status = 0x100;
 				pd->pd_meta->rebuild_lba =
 				    sd->sd_rebuild_pos / vol->v_sectorsize *
 				    meta->array_width;
 			}
 		} else
 			pd->pd_meta->disk_number = meta->total_disks + spares++;
 		G_RAID_DEBUG(1, "Writing NVIDIA metadata to %s",
 		    g_raid_get_diskname(disk));
 		g_raid_md_nvidia_print(pd->pd_meta);
 		nvidia_meta_write(disk->d_consumer, pd->pd_meta);
 	}
 	return (0);
 }
 
 static int
 g_raid_md_fail_disk_nvidia(struct g_raid_md_object *md,
     struct g_raid_subdisk *tsd, struct g_raid_disk *tdisk)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_md_nvidia_perdisk *pd;
 	struct g_raid_subdisk *sd;
 
 	sc = md->mdo_softc;
 	pd = (struct g_raid_md_nvidia_perdisk *)tdisk->d_md_data;
 
 	/* We can't fail disk that is not a part of array now. */
 	if (pd->pd_disk_pos < 0)
 		return (-1);
 
 	/* Erase metadata to prevent disks's later resurrection. */
 	if (tdisk->d_consumer)
 		nvidia_meta_erase(tdisk->d_consumer);
 
 	/* Change states. */
 	g_raid_change_disk_state(tdisk, G_RAID_DISK_S_FAILED);
 	TAILQ_FOREACH(sd, &tdisk->d_subdisks, sd_next) {
 		g_raid_change_subdisk_state(sd,
 		    G_RAID_SUBDISK_S_FAILED);
 		g_raid_event_send(sd, G_RAID_SUBDISK_E_FAILED,
 		    G_RAID_EVENT_SUBDISK);
 	}
 
 	/* Write updated metadata to remaining disks. */
 	g_raid_md_write_nvidia(md, NULL, NULL, tdisk);
 
 	/* Check if anything left except placeholders. */
 	if (g_raid_ndisks(sc, -1) ==
 	    g_raid_ndisks(sc, G_RAID_DISK_S_OFFLINE))
 		g_raid_destroy_node(sc, 0);
 	else
 		g_raid_md_nvidia_refill(sc);
 	return (0);
 }
 
 static int
 g_raid_md_free_disk_nvidia(struct g_raid_md_object *md,
     struct g_raid_disk *disk)
 {
 	struct g_raid_md_nvidia_perdisk *pd;
 
 	pd = (struct g_raid_md_nvidia_perdisk *)disk->d_md_data;
 	if (pd->pd_meta != NULL) {
 		free(pd->pd_meta, M_MD_NVIDIA);
 		pd->pd_meta = NULL;
 	}
 	free(pd, M_MD_NVIDIA);
 	disk->d_md_data = NULL;
 	return (0);
 }
 
 static int
 g_raid_md_free_nvidia(struct g_raid_md_object *md)
 {
 	struct g_raid_md_nvidia_object *mdi;
 
 	mdi = (struct g_raid_md_nvidia_object *)md;
 	if (!mdi->mdio_started) {
 		mdi->mdio_started = 0;
 		callout_stop(&mdi->mdio_start_co);
 		G_RAID_DEBUG1(1, md->mdo_softc,
 		    "root_mount_rel %p", mdi->mdio_rootmount);
 		root_mount_rel(mdi->mdio_rootmount);
 		mdi->mdio_rootmount = NULL;
 	}
 	if (mdi->mdio_meta != NULL) {
 		free(mdi->mdio_meta, M_MD_NVIDIA);
 		mdi->mdio_meta = NULL;
 	}
 	return (0);
 }
 
 G_RAID_MD_DECLARE(nvidia, "NVIDIA");
Index: head/sys/geom/raid/md_promise.c
===================================================================
--- head/sys/geom/raid/md_promise.c	(revision 350693)
+++ head/sys/geom/raid/md_promise.c	(revision 350694)
@@ -1,2007 +1,2008 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2011 Alexander Motin <mav@FreeBSD.org>
  * Copyright (c) 2000 - 2008 Søren Schmidt <sos@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/bio.h>
 #include <sys/endian.h>
 #include <sys/kernel.h>
 #include <sys/kobj.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/systm.h>
 #include <geom/geom.h>
+#include <geom/geom_dbg.h>
 #include "geom/raid/g_raid.h"
 #include "g_raid_md_if.h"
 
 static MALLOC_DEFINE(M_MD_PROMISE, "md_promise_data", "GEOM_RAID Promise metadata");
 
 #define	PROMISE_MAX_DISKS	8
 #define	PROMISE_MAX_SUBDISKS	2
 #define	PROMISE_META_OFFSET	14
 
 struct promise_raid_disk {
 	uint8_t		flags;			/* Subdisk status. */
 #define PROMISE_F_VALID		0x01
 #define PROMISE_F_ONLINE	0x02
 #define PROMISE_F_ASSIGNED	0x04
 #define PROMISE_F_SPARE		0x08
 #define PROMISE_F_DUPLICATE	0x10
 #define PROMISE_F_REDIR		0x20
 #define PROMISE_F_DOWN		0x40
 #define PROMISE_F_READY		0x80
 
 	uint8_t		number;			/* Position in a volume. */
 	uint8_t		channel;		/* ATA channel number. */
 	uint8_t		device;			/* ATA device number. */
 	uint64_t	id __packed;		/* Subdisk ID. */
 } __packed;
 
 struct promise_raid_conf {
 	char		promise_id[24];
 #define PROMISE_MAGIC		"Promise Technology, Inc."
 #define FREEBSD_MAGIC		"FreeBSD ATA driver RAID "
 
 	uint32_t	dummy_0;
 	uint64_t	magic_0;
 #define PROMISE_MAGIC0(x)	(((uint64_t)(x.channel) << 48) | \
 				((uint64_t)(x.device != 0) << 56))
 	uint16_t	magic_1;
 	uint32_t	magic_2;
 	uint8_t		filler1[470];
 
 	uint32_t	integrity;
 #define PROMISE_I_VALID		0x00000080
 
 	struct promise_raid_disk	disk;	/* This subdisk info. */
 	uint32_t	disk_offset;		/* Subdisk offset. */
 	uint32_t	disk_sectors;		/* Subdisk size */
 	uint32_t	disk_rebuild;		/* Rebuild position. */
 	uint16_t	generation;		/* Generation number. */
 	uint8_t		status;			/* Volume status. */
 #define PROMISE_S_VALID		0x01
 #define PROMISE_S_ONLINE	0x02
 #define PROMISE_S_INITED	0x04
 #define PROMISE_S_READY		0x08
 #define PROMISE_S_DEGRADED	0x10
 #define PROMISE_S_MARKED	0x20
 #define PROMISE_S_MIGRATING	0x40
 #define PROMISE_S_FUNCTIONAL	0x80
 
 	uint8_t		type;			/* Voluem type. */
 #define PROMISE_T_RAID0		0x00
 #define PROMISE_T_RAID1		0x01
 #define PROMISE_T_RAID3		0x02
 #define PROMISE_T_RAID5		0x04
 #define PROMISE_T_SPAN		0x08
 #define PROMISE_T_JBOD		0x10
 
 	uint8_t		total_disks;		/* Disks in this volume. */
 	uint8_t		stripe_shift;		/* Strip size. */
 	uint8_t		array_width;		/* Number of RAID0 stripes. */
 	uint8_t		array_number;		/* Global volume number. */
 	uint32_t	total_sectors;		/* Volume size. */
 	uint16_t	cylinders;		/* Volume geometry: C. */
 	uint8_t		heads;			/* Volume geometry: H. */
 	uint8_t		sectors;		/* Volume geometry: S. */
 	uint64_t	volume_id __packed;	/* Volume ID, */
 	struct promise_raid_disk	disks[PROMISE_MAX_DISKS];
 						/* Subdisks in this volume. */
 	char		name[32];		/* Volume label. */
 
 	uint32_t	filler2[8];
 	uint32_t	magic_3;	/* Something related to rebuild. */
 	uint64_t	rebuild_lba64;	/* Per-volume rebuild position. */
 	uint32_t	magic_4;
 	uint32_t	magic_5;
 	uint32_t	total_sectors_high;
 	uint8_t		magic_6;
 	uint8_t		sector_size;
 	uint16_t	magic_7;
 	uint32_t	magic_8[31];
 	uint32_t	backup_time;
 	uint16_t	magic_9;
 	uint32_t	disk_offset_high;
 	uint32_t	disk_sectors_high;
 	uint32_t	disk_rebuild_high;
 	uint16_t	magic_10;
 	uint32_t	magic_11[3];
 	uint32_t	filler3[284];
 	uint32_t	checksum;
 } __packed;
 
 struct g_raid_md_promise_perdisk {
 	int		 pd_updated;
 	int		 pd_subdisks;
 	struct promise_raid_conf	*pd_meta[PROMISE_MAX_SUBDISKS];
 };
 
 struct g_raid_md_promise_pervolume {
 	struct promise_raid_conf	*pv_meta;
 	uint64_t			 pv_id;
 	uint16_t			 pv_generation;
 	int				 pv_disks_present;
 	int				 pv_started;
 	struct callout			 pv_start_co;	/* STARTING state timer. */
 };
 
 static g_raid_md_create_t g_raid_md_create_promise;
 static g_raid_md_taste_t g_raid_md_taste_promise;
 static g_raid_md_event_t g_raid_md_event_promise;
 static g_raid_md_volume_event_t g_raid_md_volume_event_promise;
 static g_raid_md_ctl_t g_raid_md_ctl_promise;
 static g_raid_md_write_t g_raid_md_write_promise;
 static g_raid_md_fail_disk_t g_raid_md_fail_disk_promise;
 static g_raid_md_free_disk_t g_raid_md_free_disk_promise;
 static g_raid_md_free_volume_t g_raid_md_free_volume_promise;
 static g_raid_md_free_t g_raid_md_free_promise;
 
 static kobj_method_t g_raid_md_promise_methods[] = {
 	KOBJMETHOD(g_raid_md_create,	g_raid_md_create_promise),
 	KOBJMETHOD(g_raid_md_taste,	g_raid_md_taste_promise),
 	KOBJMETHOD(g_raid_md_event,	g_raid_md_event_promise),
 	KOBJMETHOD(g_raid_md_volume_event,	g_raid_md_volume_event_promise),
 	KOBJMETHOD(g_raid_md_ctl,	g_raid_md_ctl_promise),
 	KOBJMETHOD(g_raid_md_write,	g_raid_md_write_promise),
 	KOBJMETHOD(g_raid_md_fail_disk,	g_raid_md_fail_disk_promise),
 	KOBJMETHOD(g_raid_md_free_disk,	g_raid_md_free_disk_promise),
 	KOBJMETHOD(g_raid_md_free_volume,	g_raid_md_free_volume_promise),
 	KOBJMETHOD(g_raid_md_free,	g_raid_md_free_promise),
 	{ 0, 0 }
 };
 
 static struct g_raid_md_class g_raid_md_promise_class = {
 	"Promise",
 	g_raid_md_promise_methods,
 	sizeof(struct g_raid_md_object),
 	.mdc_enable = 1,
 	.mdc_priority = 100
 };
 
 
 static void
 g_raid_md_promise_print(struct promise_raid_conf *meta)
 {
 	int i;
 
 	if (g_raid_debug < 1)
 		return;
 
 	printf("********* ATA Promise Metadata *********\n");
 	printf("promise_id          <%.24s>\n", meta->promise_id);
 	printf("disk                %02x %02x %02x %02x %016jx\n",
 	    meta->disk.flags, meta->disk.number, meta->disk.channel,
 	    meta->disk.device, meta->disk.id);
 	printf("disk_offset         %u\n", meta->disk_offset);
 	printf("disk_sectors        %u\n", meta->disk_sectors);
 	printf("disk_rebuild        %u\n", meta->disk_rebuild);
 	printf("generation          %u\n", meta->generation);
 	printf("status              0x%02x\n", meta->status);
 	printf("type                %u\n", meta->type);
 	printf("total_disks         %u\n", meta->total_disks);
 	printf("stripe_shift        %u\n", meta->stripe_shift);
 	printf("array_width         %u\n", meta->array_width);
 	printf("array_number        %u\n", meta->array_number);
 	printf("total_sectors       %u\n", meta->total_sectors);
 	printf("cylinders           %u\n", meta->cylinders);
 	printf("heads               %u\n", meta->heads);
 	printf("sectors             %u\n", meta->sectors);
 	printf("volume_id           0x%016jx\n", meta->volume_id);
 	printf("disks:\n");
 	for (i = 0; i < PROMISE_MAX_DISKS; i++ ) {
 		printf("                    %02x %02x %02x %02x %016jx\n",
 		    meta->disks[i].flags, meta->disks[i].number,
 		    meta->disks[i].channel, meta->disks[i].device,
 		    meta->disks[i].id);
 	}
 	printf("name                <%.32s>\n", meta->name);
 	printf("magic_3             0x%08x\n", meta->magic_3);
 	printf("rebuild_lba64       %ju\n", meta->rebuild_lba64);
 	printf("magic_4             0x%08x\n", meta->magic_4);
 	printf("magic_5             0x%08x\n", meta->magic_5);
 	printf("total_sectors_high  0x%08x\n", meta->total_sectors_high);
 	printf("sector_size         %u\n", meta->sector_size);
 	printf("backup_time         %d\n", meta->backup_time);
 	printf("disk_offset_high    0x%08x\n", meta->disk_offset_high);
 	printf("disk_sectors_high   0x%08x\n", meta->disk_sectors_high);
 	printf("disk_rebuild_high   0x%08x\n", meta->disk_rebuild_high);
 	printf("=================================================\n");
 }
 
 static struct promise_raid_conf *
 promise_meta_copy(struct promise_raid_conf *meta)
 {
 	struct promise_raid_conf *nmeta;
 
 	nmeta = malloc(sizeof(*nmeta), M_MD_PROMISE, M_WAITOK);
 	memcpy(nmeta, meta, sizeof(*nmeta));
 	return (nmeta);
 }
 
 static int
 promise_meta_find_disk(struct promise_raid_conf *meta, uint64_t id)
 {
 	int pos;
 
 	for (pos = 0; pos < meta->total_disks; pos++) {
 		if (meta->disks[pos].id == id)
 			return (pos);
 	}
 	return (-1);
 }
 
 static int
 promise_meta_unused_range(struct promise_raid_conf **metaarr, int nsd,
     off_t sectors, off_t *off, off_t *size)
 {
 	off_t coff, csize, tmp;
 	int i, j;
 
 	sectors -= 131072;
 	*off = 0;
 	*size = 0;
 	coff = 0;
 	csize = sectors;
 	i = 0;
 	while (1) {
 		for (j = 0; j < nsd; j++) {
 			tmp = ((off_t)metaarr[j]->disk_offset_high << 32) +
 			    metaarr[j]->disk_offset;
 			if (tmp >= coff)
 				csize = MIN(csize, tmp - coff);
 		}
 		if (csize > *size) {
 			*off = coff;
 			*size = csize;
 		}
 		if (i >= nsd)
 			break;
 		coff = ((off_t)metaarr[i]->disk_offset_high << 32) +
 		     metaarr[i]->disk_offset +
 		    ((off_t)metaarr[i]->disk_sectors_high << 32) +
 		     metaarr[i]->disk_sectors;
 		csize = sectors - coff;
 		i++;
 	}
 	return ((*size > 0) ? 1 : 0);
 }
 
 static int
 promise_meta_translate_disk(struct g_raid_volume *vol, int md_disk_pos)
 {
 	int disk_pos, width;
 
 	if (md_disk_pos >= 0 && vol->v_raid_level == G_RAID_VOLUME_RL_RAID1E) {
 		width = vol->v_disks_count / 2;
 		disk_pos = (md_disk_pos / width) +
 		    (md_disk_pos % width) * width;
 	} else
 		disk_pos = md_disk_pos;
 	return (disk_pos);
 }
 
 static void
 promise_meta_get_name(struct promise_raid_conf *meta, char *buf)
 {
 	int i;
 
 	strncpy(buf, meta->name, 32);
 	buf[32] = 0;
 	for (i = 31; i >= 0; i--) {
 		if (buf[i] > 0x20)
 			break;
 		buf[i] = 0;
 	}
 }
 
 static void
 promise_meta_put_name(struct promise_raid_conf *meta, char *buf)
 {
 
 	memset(meta->name, 0x20, 32);
 	memcpy(meta->name, buf, MIN(strlen(buf), 32));
 }
 
 static int
 promise_meta_read(struct g_consumer *cp, struct promise_raid_conf **metaarr)
 {
 	struct g_provider *pp;
 	struct promise_raid_conf *meta;
 	char *buf;
 	int error, i, subdisks;
 	uint32_t checksum, *ptr;
 
 	pp = cp->provider;
 	subdisks = 0;
 
 	if (pp->sectorsize * 4 > MAXPHYS) {
 		G_RAID_DEBUG(1, "%s: Blocksize is too big.", pp->name);
 		return (subdisks);
 	}
 next:
 	/* Read metadata block. */
 	buf = g_read_data(cp, pp->mediasize - pp->sectorsize *
 	    (63 - subdisks * PROMISE_META_OFFSET),
 	    pp->sectorsize * 4, &error);
 	if (buf == NULL) {
 		G_RAID_DEBUG(1, "Cannot read metadata from %s (error=%d).",
 		    pp->name, error);
 		return (subdisks);
 	}
 	meta = (struct promise_raid_conf *)buf;
 
 	/* Check if this is an Promise RAID struct */
 	if (strncmp(meta->promise_id, PROMISE_MAGIC, strlen(PROMISE_MAGIC)) &&
 	    strncmp(meta->promise_id, FREEBSD_MAGIC, strlen(FREEBSD_MAGIC))) {
 		if (subdisks == 0)
 			G_RAID_DEBUG(1,
 			    "Promise signature check failed on %s", pp->name);
 		g_free(buf);
 		return (subdisks);
 	}
 	meta = malloc(sizeof(*meta), M_MD_PROMISE, M_WAITOK);
 	memcpy(meta, buf, MIN(sizeof(*meta), pp->sectorsize * 4));
 	g_free(buf);
 
 	/* Check metadata checksum. */
 	for (checksum = 0, ptr = (uint32_t *)meta, i = 0; i < 511; i++)
 		checksum += *ptr++;
 	if (checksum != meta->checksum) {
 		G_RAID_DEBUG(1, "Promise checksum check failed on %s", pp->name);
 		free(meta, M_MD_PROMISE);
 		return (subdisks);
 	}
 
 	if ((meta->integrity & PROMISE_I_VALID) == 0) {
 		G_RAID_DEBUG(1, "Promise metadata is invalid on %s", pp->name);
 		free(meta, M_MD_PROMISE);
 		return (subdisks);
 	}
 
 	if (meta->total_disks > PROMISE_MAX_DISKS) {
 		G_RAID_DEBUG(1, "Wrong number of disks on %s (%d)",
 		    pp->name, meta->total_disks);
 		free(meta, M_MD_PROMISE);
 		return (subdisks);
 	}
 
 	/* Remove filler garbage from fields used in newer metadata. */
 	if (meta->disk_offset_high == 0x8b8c8d8e &&
 	    meta->disk_sectors_high == 0x8788898a &&
 	    meta->disk_rebuild_high == 0x83848586) {
 		meta->disk_offset_high = 0;
 		meta->disk_sectors_high = 0;
 		if (meta->disk_rebuild == UINT32_MAX)
 			meta->disk_rebuild_high = UINT32_MAX;
 		else
 			meta->disk_rebuild_high = 0;
 		if (meta->total_sectors_high == 0x15161718) {
 			meta->total_sectors_high = 0;
 			meta->backup_time = 0;
 			if (meta->rebuild_lba64 == 0x2122232425262728)
 				meta->rebuild_lba64 = UINT64_MAX;
 		}
 	}
 	if (meta->sector_size < 1 || meta->sector_size > 8)
 		meta->sector_size = 1;
 
 	/* Save this part and look for next. */
 	*metaarr = meta;
 	metaarr++;
 	subdisks++;
 	if (subdisks < PROMISE_MAX_SUBDISKS)
 		goto next;
 
 	return (subdisks);
 }
 
 static int
 promise_meta_write(struct g_consumer *cp,
     struct promise_raid_conf **metaarr, int nsd)
 {
 	struct g_provider *pp;
 	struct promise_raid_conf *meta;
 	char *buf;
 	off_t off, size;
 	int error, i, subdisk, fake;
 	uint32_t checksum, *ptr;
 
 	pp = cp->provider;
 	subdisk = 0;
 	fake = 0;
 next:
 	buf = malloc(pp->sectorsize * 4, M_MD_PROMISE, M_WAITOK | M_ZERO);
 	meta = NULL;
 	if (subdisk < nsd) {
 		meta = metaarr[subdisk];
 	} else if (!fake && promise_meta_unused_range(metaarr, nsd,
 	    cp->provider->mediasize / cp->provider->sectorsize,
 	    &off, &size)) {
 		/* Optionally add record for unused space. */
 		meta = (struct promise_raid_conf *)buf;
 		memcpy(&meta->promise_id[0], PROMISE_MAGIC,
 		    sizeof(PROMISE_MAGIC) - 1);
 		meta->dummy_0 = 0x00020000;
 		meta->integrity = PROMISE_I_VALID;
 		meta->disk.flags = PROMISE_F_ONLINE | PROMISE_F_VALID;
 		meta->disk.number = 0xff;
 		arc4rand(&meta->disk.id, sizeof(meta->disk.id), 0);
 		meta->disk_offset_high = off >> 32;
 		meta->disk_offset = (uint32_t)off;
 		meta->disk_sectors_high = size >> 32;
 		meta->disk_sectors = (uint32_t)size;
 		meta->disk_rebuild_high = UINT32_MAX;
 		meta->disk_rebuild = UINT32_MAX;
 		fake = 1;
 	}
 	if (meta != NULL) {
 		/* Recalculate checksum for case if metadata were changed. */
 		meta->checksum = 0;
 		for (checksum = 0, ptr = (uint32_t *)meta, i = 0; i < 511; i++)
 			checksum += *ptr++;
 		meta->checksum = checksum;
 		memcpy(buf, meta, MIN(pp->sectorsize * 4, sizeof(*meta)));
 	}
 	error = g_write_data(cp, pp->mediasize - pp->sectorsize *
 	    (63 - subdisk * PROMISE_META_OFFSET),
 	    buf, pp->sectorsize * 4);
 	if (error != 0) {
 		G_RAID_DEBUG(1, "Cannot write metadata to %s (error=%d).",
 		    pp->name, error);
 	}
 	free(buf, M_MD_PROMISE);
 
 	subdisk++;
 	if (subdisk < PROMISE_MAX_SUBDISKS)
 		goto next;
 
 	return (error);
 }
 
 static int
 promise_meta_erase(struct g_consumer *cp)
 {
 	struct g_provider *pp;
 	char *buf;
 	int error, subdisk;
 
 	pp = cp->provider;
 	buf = malloc(4 * pp->sectorsize, M_MD_PROMISE, M_WAITOK | M_ZERO);
 	for (subdisk = 0; subdisk < PROMISE_MAX_SUBDISKS; subdisk++) {
 		error = g_write_data(cp, pp->mediasize - pp->sectorsize *
 		    (63 - subdisk * PROMISE_META_OFFSET),
 		    buf, 4 * pp->sectorsize);
 		if (error != 0) {
 			G_RAID_DEBUG(1, "Cannot erase metadata on %s (error=%d).",
 			    pp->name, error);
 		}
 	}
 	free(buf, M_MD_PROMISE);
 	return (error);
 }
 
 static int
 promise_meta_write_spare(struct g_consumer *cp)
 {
 	struct promise_raid_conf *meta;
 	off_t tmp;
 	int error;
 
 	meta = malloc(sizeof(*meta), M_MD_PROMISE, M_WAITOK | M_ZERO);
 	memcpy(&meta->promise_id[0], PROMISE_MAGIC, sizeof(PROMISE_MAGIC) - 1);
 	meta->dummy_0 = 0x00020000;
 	meta->integrity = PROMISE_I_VALID;
 	meta->disk.flags = PROMISE_F_SPARE | PROMISE_F_ONLINE | PROMISE_F_VALID;
 	meta->disk.number = 0xff;
 	arc4rand(&meta->disk.id, sizeof(meta->disk.id), 0);
 	tmp = cp->provider->mediasize / cp->provider->sectorsize - 131072;
 	meta->disk_sectors_high = tmp >> 32;
 	meta->disk_sectors = (uint32_t)tmp;
 	meta->disk_rebuild_high = UINT32_MAX;
 	meta->disk_rebuild = UINT32_MAX;
 	error = promise_meta_write(cp, &meta, 1);
 	free(meta, M_MD_PROMISE);
 	return (error);
 }
 
 static struct g_raid_volume *
 g_raid_md_promise_get_volume(struct g_raid_softc *sc, uint64_t id)
 {
 	struct g_raid_volume	*vol;
 	struct g_raid_md_promise_pervolume *pv;
 
 	TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
 		pv = vol->v_md_data;
 		if (pv->pv_id == id)
 			break;
 	}
 	return (vol);
 }
 
 static int
 g_raid_md_promise_purge_volumes(struct g_raid_softc *sc)
 {
 	struct g_raid_volume	*vol, *tvol;
 	struct g_raid_md_promise_pervolume *pv;
 	int i, res;
 
 	res = 0;
 	TAILQ_FOREACH_SAFE(vol, &sc->sc_volumes, v_next, tvol) {
 		pv = vol->v_md_data;
 		if (!pv->pv_started || vol->v_stopping)
 			continue;
 		for (i = 0; i < vol->v_disks_count; i++) {
 			if (vol->v_subdisks[i].sd_state != G_RAID_SUBDISK_S_NONE)
 				break;
 		}
 		if (i >= vol->v_disks_count) {
 			g_raid_destroy_volume(vol);
 			res = 1;
 		}
 	}
 	return (res);
 }
 
 static int
 g_raid_md_promise_purge_disks(struct g_raid_softc *sc)
 {
 	struct g_raid_disk	*disk, *tdisk;
 	struct g_raid_volume	*vol;
 	struct g_raid_md_promise_perdisk *pd;
 	int i, j, res;
 
 	res = 0;
 	TAILQ_FOREACH_SAFE(disk, &sc->sc_disks, d_next, tdisk) {
 		if (disk->d_state == G_RAID_DISK_S_SPARE)
 			continue;
 		pd = (struct g_raid_md_promise_perdisk *)disk->d_md_data;
 
 		/* Scan for deleted volumes. */
 		for (i = 0; i < pd->pd_subdisks; ) {
 			vol = g_raid_md_promise_get_volume(sc,
 			    pd->pd_meta[i]->volume_id);
 			if (vol != NULL && !vol->v_stopping) {
 				i++;
 				continue;
 			}
 			free(pd->pd_meta[i], M_MD_PROMISE);
 			for (j = i; j < pd->pd_subdisks - 1; j++)
 				pd->pd_meta[j] = pd->pd_meta[j + 1];
 			pd->pd_meta[pd->pd_subdisks - 1] = NULL;
 			pd->pd_subdisks--;
 			pd->pd_updated = 1;
 		}
 
 		/* If there is no metadata left - erase and delete disk. */
 		if (pd->pd_subdisks == 0) {
 			promise_meta_erase(disk->d_consumer);
 			g_raid_destroy_disk(disk);
 			res = 1;
 		}
 	}
 	return (res);
 }
 
 static int
 g_raid_md_promise_supported(int level, int qual, int disks, int force)
 {
 
 	if (disks > PROMISE_MAX_DISKS)
 		return (0);
 	switch (level) {
 	case G_RAID_VOLUME_RL_RAID0:
 		if (disks < 1)
 			return (0);
 		if (!force && disks < 2)
 			return (0);
 		break;
 	case G_RAID_VOLUME_RL_RAID1:
 		if (disks < 1)
 			return (0);
 		if (!force && (disks != 2))
 			return (0);
 		break;
 	case G_RAID_VOLUME_RL_RAID1E:
 		if (disks < 2)
 			return (0);
 		if (disks % 2 != 0)
 			return (0);
 		if (!force && (disks != 4))
 			return (0);
 		break;
 	case G_RAID_VOLUME_RL_SINGLE:
 		if (disks != 1)
 			return (0);
 		break;
 	case G_RAID_VOLUME_RL_CONCAT:
 		if (disks < 2)
 			return (0);
 		break;
 	case G_RAID_VOLUME_RL_RAID5:
 		if (disks < 3)
 			return (0);
 		if (qual != G_RAID_VOLUME_RLQ_R5LA)
 			return (0);
 		break;
 	default:
 		return (0);
 	}
 	if (level != G_RAID_VOLUME_RL_RAID5 && qual != G_RAID_VOLUME_RLQ_NONE)
 		return (0);
 	return (1);
 }
 
 static int
 g_raid_md_promise_start_disk(struct g_raid_disk *disk, int sdn,
     struct g_raid_volume *vol)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_subdisk *sd;
 	struct g_raid_md_promise_perdisk *pd;
 	struct g_raid_md_promise_pervolume *pv;
 	struct promise_raid_conf *meta;
 	off_t eoff, esize, size;
 	int disk_pos, md_disk_pos, i, resurrection = 0;
 
 	sc = disk->d_softc;
 	pd = (struct g_raid_md_promise_perdisk *)disk->d_md_data;
 
 	pv = vol->v_md_data;
 	meta = pv->pv_meta;
 
 	if (sdn >= 0) {
 		/* Find disk position in metadata by its serial. */
 		md_disk_pos = promise_meta_find_disk(meta, pd->pd_meta[sdn]->disk.id);
 		/* For RAID0+1 we need to translate order. */
 		disk_pos = promise_meta_translate_disk(vol, md_disk_pos);
 	} else {
 		md_disk_pos = -1;
 		disk_pos = -1;
 	}
 	if (disk_pos < 0) {
 		G_RAID_DEBUG1(1, sc, "Disk %s is not part of the volume %s",
 		    g_raid_get_diskname(disk), vol->v_name);
 		/* Failed stale disk is useless for us. */
 		if (sdn >= 0 &&
 		    pd->pd_meta[sdn]->disk.flags & PROMISE_F_DOWN) {
 			g_raid_change_disk_state(disk, G_RAID_DISK_S_STALE_FAILED);
 			return (0);
 		}
 		/* If we were given specific metadata subdisk - erase it. */
 		if (sdn >= 0) {
 			free(pd->pd_meta[sdn], M_MD_PROMISE);
 			for (i = sdn; i < pd->pd_subdisks - 1; i++)
 				pd->pd_meta[i] = pd->pd_meta[i + 1];
 			pd->pd_meta[pd->pd_subdisks - 1] = NULL;
 			pd->pd_subdisks--;
 		}
 		/* If we are in the start process, that's all for now. */
 		if (!pv->pv_started)
 			goto nofit;
 		/*
 		 * If we have already started - try to get use of the disk.
 		 * Try to replace OFFLINE disks first, then FAILED.
 		 */
 		promise_meta_unused_range(pd->pd_meta, pd->pd_subdisks,
 		    disk->d_consumer->provider->mediasize /
 		    disk->d_consumer->provider->sectorsize,
 		    &eoff, &esize);
 		if (esize == 0) {
 			G_RAID_DEBUG1(1, sc, "No free space on disk %s",
 			    g_raid_get_diskname(disk));
 			goto nofit;
 		}
 		size = INT64_MAX;
 		for (i = 0; i < vol->v_disks_count; i++) {
 			sd = &vol->v_subdisks[i];
 			if (sd->sd_state != G_RAID_SUBDISK_S_NONE)
 				size = sd->sd_size;
 			if (sd->sd_state <= G_RAID_SUBDISK_S_FAILED &&
 			    (disk_pos < 0 ||
 			     vol->v_subdisks[i].sd_state < sd->sd_state))
 				disk_pos = i;
 		}
 		if (disk_pos >= 0 &&
 		    vol->v_raid_level != G_RAID_VOLUME_RL_CONCAT &&
 		    (off_t)esize * 512 < size) {
 			G_RAID_DEBUG1(1, sc, "Disk %s free space "
 			    "is too small (%ju < %ju)",
 			    g_raid_get_diskname(disk),
 			    (off_t)esize * 512, size);
 			disk_pos = -1;
 		}
 		if (disk_pos >= 0) {
 			if (vol->v_raid_level != G_RAID_VOLUME_RL_CONCAT)
 				esize = size / 512;
 			/* For RAID0+1 we need to translate order. */
 			md_disk_pos = promise_meta_translate_disk(vol, disk_pos);
 		} else {
 nofit:
 			if (pd->pd_subdisks == 0) {
 				g_raid_change_disk_state(disk,
 				    G_RAID_DISK_S_SPARE);
 			}
 			return (0);
 		}
 		G_RAID_DEBUG1(1, sc, "Disk %s takes pos %d in the volume %s",
 		    g_raid_get_diskname(disk), disk_pos, vol->v_name);
 		resurrection = 1;
 	}
 
 	sd = &vol->v_subdisks[disk_pos];
 
 	if (resurrection && sd->sd_disk != NULL) {
 		g_raid_change_disk_state(sd->sd_disk,
 		    G_RAID_DISK_S_STALE_FAILED);
 		TAILQ_REMOVE(&sd->sd_disk->d_subdisks,
 		    sd, sd_next);
 	}
 	vol->v_subdisks[disk_pos].sd_disk = disk;
 	TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next);
 
 	/* Welcome the new disk. */
 	if (resurrection)
 		g_raid_change_disk_state(disk, G_RAID_DISK_S_ACTIVE);
 	else if (meta->disks[md_disk_pos].flags & PROMISE_F_DOWN)
 		g_raid_change_disk_state(disk, G_RAID_DISK_S_FAILED);
 	else
 		g_raid_change_disk_state(disk, G_RAID_DISK_S_ACTIVE);
 
 	if (resurrection) {
 		sd->sd_offset = (off_t)eoff * 512;
 		sd->sd_size = (off_t)esize * 512;
 	} else {
 		sd->sd_offset = (((off_t)pd->pd_meta[sdn]->disk_offset_high
 		    << 32) + pd->pd_meta[sdn]->disk_offset) * 512;
 		sd->sd_size = (((off_t)pd->pd_meta[sdn]->disk_sectors_high
 		    << 32) + pd->pd_meta[sdn]->disk_sectors) * 512;
 	}
 
 	if (resurrection) {
 		/* Stale disk, almost same as new. */
 		g_raid_change_subdisk_state(sd,
 		    G_RAID_SUBDISK_S_NEW);
 	} else if (meta->disks[md_disk_pos].flags & PROMISE_F_DOWN) {
 		/* Failed disk. */
 		g_raid_change_subdisk_state(sd,
 		    G_RAID_SUBDISK_S_FAILED);
 	} else if (meta->disks[md_disk_pos].flags & PROMISE_F_REDIR) {
 		/* Rebuilding disk. */
 		g_raid_change_subdisk_state(sd,
 		    G_RAID_SUBDISK_S_REBUILD);
 		if (pd->pd_meta[sdn]->generation != meta->generation)
 			sd->sd_rebuild_pos = 0;
 		else {
 			sd->sd_rebuild_pos =
 			    (((off_t)pd->pd_meta[sdn]->disk_rebuild_high << 32) +
 			     pd->pd_meta[sdn]->disk_rebuild) * 512;
 		}
 	} else if (!(meta->disks[md_disk_pos].flags & PROMISE_F_ONLINE)) {
 		/* Rebuilding disk. */
 		g_raid_change_subdisk_state(sd,
 		    G_RAID_SUBDISK_S_NEW);
 	} else if (pd->pd_meta[sdn]->generation != meta->generation ||
 	    (meta->status & PROMISE_S_MARKED)) {
 		/* Stale disk or dirty volume (unclean shutdown). */
 		g_raid_change_subdisk_state(sd,
 		    G_RAID_SUBDISK_S_STALE);
 	} else {
 		/* Up to date disk. */
 		g_raid_change_subdisk_state(sd,
 		    G_RAID_SUBDISK_S_ACTIVE);
 	}
 	g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW,
 	    G_RAID_EVENT_SUBDISK);
 
 	return (resurrection);
 }
 
 static void
 g_raid_md_promise_refill(struct g_raid_softc *sc)
 {
 	struct g_raid_volume *vol;
 	struct g_raid_subdisk *sd;
 	struct g_raid_disk *disk;
 	struct g_raid_md_object *md;
 	struct g_raid_md_promise_perdisk *pd;
 	struct g_raid_md_promise_pervolume *pv;
 	int update, updated, i, bad;
 
 	md = sc->sc_md;
 restart:
 	updated = 0;
 	TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
 		pv = vol->v_md_data;
 		if (!pv->pv_started || vol->v_stopping)
 			continue;
 
 		/* Search for subdisk that needs replacement. */
 		bad = 0;
 		for (i = 0; i < vol->v_disks_count; i++) {
 			sd = &vol->v_subdisks[i];
 			if (sd->sd_state == G_RAID_SUBDISK_S_NONE ||
 			    sd->sd_state == G_RAID_SUBDISK_S_FAILED)
 			        bad = 1;
 		}
 		if (!bad)
 			continue;
 
 		G_RAID_DEBUG1(1, sc, "Volume %s is not complete, "
 		    "trying to refill.", vol->v_name);
 
 		TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 			/* Skip failed. */
 			if (disk->d_state < G_RAID_DISK_S_SPARE)
 				continue;
 			/* Skip already used by this volume. */
 			for (i = 0; i < vol->v_disks_count; i++) {
 				sd = &vol->v_subdisks[i];
 				if (sd->sd_disk == disk)
 					break;
 			}
 			if (i < vol->v_disks_count)
 				continue;
 
 			/* Try to use disk if it has empty extents. */
 			pd = disk->d_md_data;
 			if (pd->pd_subdisks < PROMISE_MAX_SUBDISKS) {
 				update =
 				    g_raid_md_promise_start_disk(disk, -1, vol);
 			} else
 				update = 0;
 			if (update) {
 				updated = 1;
 				g_raid_md_write_promise(md, vol, NULL, disk);
 				break;
 			}
 		}
 	}
 	if (updated)
 		goto restart;
 }
 
 static void
 g_raid_md_promise_start(struct g_raid_volume *vol)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_subdisk *sd;
 	struct g_raid_disk *disk;
 	struct g_raid_md_object *md;
 	struct g_raid_md_promise_perdisk *pd;
 	struct g_raid_md_promise_pervolume *pv;
 	struct promise_raid_conf *meta;
 	u_int i;
 
 	sc = vol->v_softc;
 	md = sc->sc_md;
 	pv = vol->v_md_data;
 	meta = pv->pv_meta;
 
 	vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_NONE;
 	if (meta->type == PROMISE_T_RAID0)
 		vol->v_raid_level = G_RAID_VOLUME_RL_RAID0;
 	else if (meta->type == PROMISE_T_RAID1) {
 		if (meta->array_width == 1)
 			vol->v_raid_level = G_RAID_VOLUME_RL_RAID1;
 		else
 			vol->v_raid_level = G_RAID_VOLUME_RL_RAID1E;
 	} else if (meta->type == PROMISE_T_RAID3)
 		vol->v_raid_level = G_RAID_VOLUME_RL_RAID3;
 	else if (meta->type == PROMISE_T_RAID5) {
 		vol->v_raid_level = G_RAID_VOLUME_RL_RAID5;
 		vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_R5LA;
 	} else if (meta->type == PROMISE_T_SPAN)
 		vol->v_raid_level = G_RAID_VOLUME_RL_CONCAT;
 	else if (meta->type == PROMISE_T_JBOD)
 		vol->v_raid_level = G_RAID_VOLUME_RL_SINGLE;
 	else
 		vol->v_raid_level = G_RAID_VOLUME_RL_UNKNOWN;
 	vol->v_strip_size = 512 << meta->stripe_shift; //ZZZ
 	vol->v_disks_count = meta->total_disks;
 	vol->v_mediasize = (off_t)meta->total_sectors * 512; //ZZZ
 	if (meta->total_sectors_high < 256) /* If value looks sane. */
 		vol->v_mediasize +=
 		    ((off_t)meta->total_sectors_high << 32) * 512; //ZZZ
 	vol->v_sectorsize = 512 * meta->sector_size;
 	for (i = 0; i < vol->v_disks_count; i++) {
 		sd = &vol->v_subdisks[i];
 		sd->sd_offset = (((off_t)meta->disk_offset_high << 32) +
 		    meta->disk_offset) * 512;
 		sd->sd_size = (((off_t)meta->disk_sectors_high << 32) +
 		    meta->disk_sectors) * 512;
 	}
 	g_raid_start_volume(vol);
 
 	/* Make all disks found till the moment take their places. */
 	TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 		pd = disk->d_md_data;
 		for (i = 0; i < pd->pd_subdisks; i++) {
 			if (pd->pd_meta[i]->volume_id == meta->volume_id)
 				g_raid_md_promise_start_disk(disk, i, vol);
 		}
 	}
 
 	pv->pv_started = 1;
 	callout_stop(&pv->pv_start_co);
 	G_RAID_DEBUG1(0, sc, "Volume started.");
 	g_raid_md_write_promise(md, vol, NULL, NULL);
 
 	/* Pickup any STALE/SPARE disks to refill array if needed. */
 	g_raid_md_promise_refill(sc);
 
 	g_raid_event_send(vol, G_RAID_VOLUME_E_START, G_RAID_EVENT_VOLUME);
 }
 
 static void
 g_raid_promise_go(void *arg)
 {
 	struct g_raid_volume *vol;
 	struct g_raid_softc *sc;
 	struct g_raid_md_promise_pervolume *pv;
 
 	vol = arg;
 	pv = vol->v_md_data;
 	sc = vol->v_softc;
 	if (!pv->pv_started) {
 		G_RAID_DEBUG1(0, sc, "Force volume start due to timeout.");
 		g_raid_event_send(vol, G_RAID_VOLUME_E_STARTMD,
 		    G_RAID_EVENT_VOLUME);
 	}
 }
 
 static void
 g_raid_md_promise_new_disk(struct g_raid_disk *disk)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_md_object *md;
 	struct promise_raid_conf *pdmeta;
 	struct g_raid_md_promise_perdisk *pd;
 	struct g_raid_md_promise_pervolume *pv;
 	struct g_raid_volume *vol;
 	int i;
 	char buf[33];
 
 	sc = disk->d_softc;
 	md = sc->sc_md;
 	pd = (struct g_raid_md_promise_perdisk *)disk->d_md_data;
 
 	if (pd->pd_subdisks == 0) {
 		g_raid_change_disk_state(disk, G_RAID_DISK_S_SPARE);
 		g_raid_md_promise_refill(sc);
 		return;
 	}
 
 	for (i = 0; i < pd->pd_subdisks; i++) {
 		pdmeta = pd->pd_meta[i];
 
 		/* Look for volume with matching ID. */
 		vol = g_raid_md_promise_get_volume(sc, pdmeta->volume_id);
 		if (vol == NULL) {
 			promise_meta_get_name(pdmeta, buf);
 			vol = g_raid_create_volume(sc, buf, pdmeta->array_number);
 			pv = malloc(sizeof(*pv), M_MD_PROMISE, M_WAITOK | M_ZERO);
 			pv->pv_id = pdmeta->volume_id;
 			vol->v_md_data = pv;
 			callout_init(&pv->pv_start_co, 1);
 			callout_reset(&pv->pv_start_co,
 			    g_raid_start_timeout * hz,
 			    g_raid_promise_go, vol);
 		} else
 			pv = vol->v_md_data;
 
 		/* If we haven't started yet - check metadata freshness. */
 		if (pv->pv_meta == NULL || !pv->pv_started) {
 			if (pv->pv_meta == NULL ||
 			    ((int16_t)(pdmeta->generation - pv->pv_generation)) > 0) {
 				G_RAID_DEBUG1(1, sc, "Newer disk");
 				if (pv->pv_meta != NULL)
 					free(pv->pv_meta, M_MD_PROMISE);
 				pv->pv_meta = promise_meta_copy(pdmeta);
 				pv->pv_generation = pv->pv_meta->generation;
 				pv->pv_disks_present = 1;
 			} else if (pdmeta->generation == pv->pv_generation) {
 				pv->pv_disks_present++;
 				G_RAID_DEBUG1(1, sc, "Matching disk (%d of %d up)",
 				    pv->pv_disks_present,
 				    pv->pv_meta->total_disks);
 			} else {
 				G_RAID_DEBUG1(1, sc, "Older disk");
 			}
 		}
 	}
 
 	for (i = 0; i < pd->pd_subdisks; i++) {
 		pdmeta = pd->pd_meta[i];
 
 		/* Look for volume with matching ID. */
 		vol = g_raid_md_promise_get_volume(sc, pdmeta->volume_id);
 		if (vol == NULL)
 			continue;
 		pv = vol->v_md_data;
 
 		if (pv->pv_started) {
 			if (g_raid_md_promise_start_disk(disk, i, vol))
 				g_raid_md_write_promise(md, vol, NULL, NULL);
 		} else {
 			/* If we collected all needed disks - start array. */
 			if (pv->pv_disks_present == pv->pv_meta->total_disks)
 				g_raid_md_promise_start(vol);
 		}
 	}
 }
 
 static int
 g_raid_md_create_promise(struct g_raid_md_object *md, struct g_class *mp,
     struct g_geom **gp)
 {
 	struct g_geom *geom;
 	struct g_raid_softc *sc;
 
 	/* Search for existing node. */
 	LIST_FOREACH(geom, &mp->geom, geom) {
 		sc = geom->softc;
 		if (sc == NULL)
 			continue;
 		if (sc->sc_stopping != 0)
 			continue;
 		if (sc->sc_md->mdo_class != md->mdo_class)
 			continue;
 		break;
 	}
 	if (geom != NULL) {
 		*gp = geom;
 		return (G_RAID_MD_TASTE_EXISTING);
 	}
 
 	/* Create new one if not found. */
 	sc = g_raid_create_node(mp, "Promise", md);
 	if (sc == NULL)
 		return (G_RAID_MD_TASTE_FAIL);
 	md->mdo_softc = sc;
 	*gp = sc->sc_geom;
 	return (G_RAID_MD_TASTE_NEW);
 }
 
 static int
 g_raid_md_taste_promise(struct g_raid_md_object *md, struct g_class *mp,
                               struct g_consumer *cp, struct g_geom **gp)
 {
 	struct g_consumer *rcp;
 	struct g_provider *pp;
 	struct g_raid_softc *sc;
 	struct g_raid_disk *disk;
 	struct promise_raid_conf *metaarr[4];
 	struct g_raid_md_promise_perdisk *pd;
 	struct g_geom *geom;
 	int i, j, result, len, subdisks;
 	char name[16];
 	uint16_t vendor;
 
 	G_RAID_DEBUG(1, "Tasting Promise on %s", cp->provider->name);
 	pp = cp->provider;
 
 	/* Read metadata from device. */
 	g_topology_unlock();
 	vendor = 0xffff;
 	len = sizeof(vendor);
 	if (pp->geom->rank == 1)
 		g_io_getattr("GEOM::hba_vendor", cp, &len, &vendor);
 	subdisks = promise_meta_read(cp, metaarr);
 	g_topology_lock();
 	if (subdisks == 0) {
 		if (g_raid_aggressive_spare) {
 			if (vendor == 0x105a || vendor == 0x1002) {
 				G_RAID_DEBUG(1,
 				    "No Promise metadata, forcing spare.");
 				goto search;
 			} else {
 				G_RAID_DEBUG(1,
 				    "Promise/ATI vendor mismatch "
 				    "0x%04x != 0x105a/0x1002",
 				    vendor);
 			}
 		}
 		return (G_RAID_MD_TASTE_FAIL);
 	}
 
 	/* Metadata valid. Print it. */
 	for (i = 0; i < subdisks; i++)
 		g_raid_md_promise_print(metaarr[i]);
 
 	/* Purge meaningless (empty/spare) records. */
 	for (i = 0; i < subdisks; ) {
 		if (metaarr[i]->disk.flags & PROMISE_F_ASSIGNED) {
 			i++;
 			continue;
 		}
 		free(metaarr[i], M_MD_PROMISE);
 		for (j = i; j < subdisks - 1; j++)
 			metaarr[i] = metaarr[j + 1];
 		metaarr[subdisks - 1] = NULL;
 		subdisks--;
 	}
 
 search:
 	/* Search for matching node. */
 	sc = NULL;
 	LIST_FOREACH(geom, &mp->geom, geom) {
 		sc = geom->softc;
 		if (sc == NULL)
 			continue;
 		if (sc->sc_stopping != 0)
 			continue;
 		if (sc->sc_md->mdo_class != md->mdo_class)
 			continue;
 		break;
 	}
 
 	/* Found matching node. */
 	if (geom != NULL) {
 		G_RAID_DEBUG(1, "Found matching array %s", sc->sc_name);
 		result = G_RAID_MD_TASTE_EXISTING;
 
 	} else { /* Not found matching node -- create one. */
 		result = G_RAID_MD_TASTE_NEW;
 		snprintf(name, sizeof(name), "Promise");
 		sc = g_raid_create_node(mp, name, md);
 		md->mdo_softc = sc;
 		geom = sc->sc_geom;
 	}
 
 	/* There is no return after this point, so we close passed consumer. */
 	g_access(cp, -1, 0, 0);
 
 	rcp = g_new_consumer(geom);
 	rcp->flags |= G_CF_DIRECT_RECEIVE;
 	g_attach(rcp, pp);
 	if (g_access(rcp, 1, 1, 1) != 0)
 		; //goto fail1;
 
 	g_topology_unlock();
 	sx_xlock(&sc->sc_lock);
 
 	pd = malloc(sizeof(*pd), M_MD_PROMISE, M_WAITOK | M_ZERO);
 	pd->pd_subdisks = subdisks;
 	for (i = 0; i < subdisks; i++)
 		pd->pd_meta[i] = metaarr[i];
 	disk = g_raid_create_disk(sc);
 	disk->d_md_data = (void *)pd;
 	disk->d_consumer = rcp;
 	rcp->private = disk;
 
 	g_raid_get_disk_info(disk);
 
 	g_raid_md_promise_new_disk(disk);
 
 	sx_xunlock(&sc->sc_lock);
 	g_topology_lock();
 	*gp = geom;
 	return (result);
 }
 
 static int
 g_raid_md_event_promise(struct g_raid_md_object *md,
     struct g_raid_disk *disk, u_int event)
 {
 	struct g_raid_softc *sc;
 
 	sc = md->mdo_softc;
 	if (disk == NULL)
 		return (-1);
 	switch (event) {
 	case G_RAID_DISK_E_DISCONNECTED:
 		/* Delete disk. */
 		g_raid_change_disk_state(disk, G_RAID_DISK_S_NONE);
 		g_raid_destroy_disk(disk);
 		g_raid_md_promise_purge_volumes(sc);
 
 		/* Write updated metadata to all disks. */
 		g_raid_md_write_promise(md, NULL, NULL, NULL);
 
 		/* Check if anything left. */
 		if (g_raid_ndisks(sc, -1) == 0)
 			g_raid_destroy_node(sc, 0);
 		else
 			g_raid_md_promise_refill(sc);
 		return (0);
 	}
 	return (-2);
 }
 
 static int
 g_raid_md_volume_event_promise(struct g_raid_md_object *md,
     struct g_raid_volume *vol, u_int event)
 {
 	struct g_raid_md_promise_pervolume *pv;
 
 	pv = (struct g_raid_md_promise_pervolume *)vol->v_md_data;
 	switch (event) {
 	case G_RAID_VOLUME_E_STARTMD:
 		if (!pv->pv_started)
 			g_raid_md_promise_start(vol);
 		return (0);
 	}
 	return (-2);
 }
 
 static int
 g_raid_md_ctl_promise(struct g_raid_md_object *md,
     struct gctl_req *req)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_volume *vol, *vol1;
 	struct g_raid_subdisk *sd;
 	struct g_raid_disk *disk, *disks[PROMISE_MAX_DISKS];
 	struct g_raid_md_promise_perdisk *pd;
 	struct g_raid_md_promise_pervolume *pv;
 	struct g_consumer *cp;
 	struct g_provider *pp;
 	char arg[16];
 	const char *nodename, *verb, *volname, *levelname, *diskname;
 	char *tmp;
 	int *nargs, *force;
 	off_t esize, offs[PROMISE_MAX_DISKS], size, sectorsize, strip;
 	intmax_t *sizearg, *striparg;
 	int numdisks, i, len, level, qual;
 	int error;
 
 	sc = md->mdo_softc;
 	verb = gctl_get_param(req, "verb", NULL);
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	error = 0;
 	if (strcmp(verb, "label") == 0) {
 
 		if (*nargs < 4) {
 			gctl_error(req, "Invalid number of arguments.");
 			return (-1);
 		}
 		volname = gctl_get_asciiparam(req, "arg1");
 		if (volname == NULL) {
 			gctl_error(req, "No volume name.");
 			return (-2);
 		}
 		levelname = gctl_get_asciiparam(req, "arg2");
 		if (levelname == NULL) {
 			gctl_error(req, "No RAID level.");
 			return (-3);
 		}
 		if (strcasecmp(levelname, "RAID5") == 0)
 			levelname = "RAID5-LA";
 		if (g_raid_volume_str2level(levelname, &level, &qual)) {
 			gctl_error(req, "Unknown RAID level '%s'.", levelname);
 			return (-4);
 		}
 		numdisks = *nargs - 3;
 		force = gctl_get_paraml(req, "force", sizeof(*force));
 		if (!g_raid_md_promise_supported(level, qual, numdisks,
 		    force ? *force : 0)) {
 			gctl_error(req, "Unsupported RAID level "
 			    "(0x%02x/0x%02x), or number of disks (%d).",
 			    level, qual, numdisks);
 			return (-5);
 		}
 
 		/* Search for disks, connect them and probe. */
 		size = INT64_MAX;
 		sectorsize = 0;
 		bzero(disks, sizeof(disks));
 		bzero(offs, sizeof(offs));
 		for (i = 0; i < numdisks; i++) {
 			snprintf(arg, sizeof(arg), "arg%d", i + 3);
 			diskname = gctl_get_asciiparam(req, arg);
 			if (diskname == NULL) {
 				gctl_error(req, "No disk name (%s).", arg);
 				error = -6;
 				break;
 			}
 			if (strcmp(diskname, "NONE") == 0)
 				continue;
 
 			TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 				if (disk->d_consumer != NULL && 
 				    disk->d_consumer->provider != NULL &&
 				    strcmp(disk->d_consumer->provider->name,
 				     diskname) == 0)
 					break;
 			}
 			if (disk != NULL) {
 				if (disk->d_state != G_RAID_DISK_S_ACTIVE) {
 					gctl_error(req, "Disk '%s' is in a "
 					    "wrong state (%s).", diskname,
 					    g_raid_disk_state2str(disk->d_state));
 					error = -7;
 					break;
 				}
 				pd = disk->d_md_data;
 				if (pd->pd_subdisks >= PROMISE_MAX_SUBDISKS) {
 					gctl_error(req, "Disk '%s' already "
 					    "used by %d volumes.",
 					    diskname, pd->pd_subdisks);
 					error = -7;
 					break;
 				}
 				pp = disk->d_consumer->provider;
 				disks[i] = disk;
 				promise_meta_unused_range(pd->pd_meta,
 				    pd->pd_subdisks,
 				    pp->mediasize / pp->sectorsize,
 				    &offs[i], &esize);
 				size = MIN(size, (off_t)esize * pp->sectorsize);
 				sectorsize = MAX(sectorsize, pp->sectorsize);
 				continue;
 			}
 
 			g_topology_lock();
 			cp = g_raid_open_consumer(sc, diskname);
 			if (cp == NULL) {
 				gctl_error(req, "Can't open disk '%s'.",
 				    diskname);
 				g_topology_unlock();
 				error = -8;
 				break;
 			}
 			pp = cp->provider;
 			pd = malloc(sizeof(*pd), M_MD_PROMISE, M_WAITOK | M_ZERO);
 			disk = g_raid_create_disk(sc);
 			disk->d_md_data = (void *)pd;
 			disk->d_consumer = cp;
 			disks[i] = disk;
 			cp->private = disk;
 			g_topology_unlock();
 
 			g_raid_get_disk_info(disk);
 
 			/* Reserve some space for metadata. */
 			size = MIN(size, pp->mediasize - 131072llu * pp->sectorsize);
 			sectorsize = MAX(sectorsize, pp->sectorsize);
 		}
 		if (error != 0) {
 			for (i = 0; i < numdisks; i++) {
 				if (disks[i] != NULL &&
 				    disks[i]->d_state == G_RAID_DISK_S_NONE)
 					g_raid_destroy_disk(disks[i]);
 			}
 			return (error);
 		}
 
 		if (sectorsize <= 0) {
 			gctl_error(req, "Can't get sector size.");
 			return (-8);
 		}
 
 		/* Handle size argument. */
 		len = sizeof(*sizearg);
 		sizearg = gctl_get_param(req, "size", &len);
 		if (sizearg != NULL && len == sizeof(*sizearg) &&
 		    *sizearg > 0) {
 			if (*sizearg > size) {
 				gctl_error(req, "Size too big %lld > %lld.",
 				    (long long)*sizearg, (long long)size);
 				return (-9);
 			}
 			size = *sizearg;
 		}
 
 		/* Handle strip argument. */
 		strip = 131072;
 		len = sizeof(*striparg);
 		striparg = gctl_get_param(req, "strip", &len);
 		if (striparg != NULL && len == sizeof(*striparg) &&
 		    *striparg > 0) {
 			if (*striparg < sectorsize) {
 				gctl_error(req, "Strip size too small.");
 				return (-10);
 			}
 			if (*striparg % sectorsize != 0) {
 				gctl_error(req, "Incorrect strip size.");
 				return (-11);
 			}
 			strip = *striparg;
 		}
 
 		/* Round size down to strip or sector. */
 		if (level == G_RAID_VOLUME_RL_RAID1 ||
 		    level == G_RAID_VOLUME_RL_SINGLE ||
 		    level == G_RAID_VOLUME_RL_CONCAT)
 			size -= (size % sectorsize);
 		else if (level == G_RAID_VOLUME_RL_RAID1E &&
 		    (numdisks & 1) != 0)
 			size -= (size % (2 * strip));
 		else
 			size -= (size % strip);
 		if (size <= 0) {
 			gctl_error(req, "Size too small.");
 			return (-13);
 		}
 
 		/* We have all we need, create things: volume, ... */
 		pv = malloc(sizeof(*pv), M_MD_PROMISE, M_WAITOK | M_ZERO);
 		arc4rand(&pv->pv_id, sizeof(pv->pv_id), 0);
 		pv->pv_generation = 0;
 		pv->pv_started = 1;
 		vol = g_raid_create_volume(sc, volname, -1);
 		vol->v_md_data = pv;
 		vol->v_raid_level = level;
 		vol->v_raid_level_qualifier = qual;
 		vol->v_strip_size = strip;
 		vol->v_disks_count = numdisks;
 		if (level == G_RAID_VOLUME_RL_RAID0 ||
 		    level == G_RAID_VOLUME_RL_CONCAT ||
 		    level == G_RAID_VOLUME_RL_SINGLE)
 			vol->v_mediasize = size * numdisks;
 		else if (level == G_RAID_VOLUME_RL_RAID1)
 			vol->v_mediasize = size;
 		else if (level == G_RAID_VOLUME_RL_RAID3 ||
 		    level == G_RAID_VOLUME_RL_RAID5)
 			vol->v_mediasize = size * (numdisks - 1);
 		else { /* RAID1E */
 			vol->v_mediasize = ((size * numdisks) / strip / 2) *
 			    strip;
 		}
 		vol->v_sectorsize = sectorsize;
 		g_raid_start_volume(vol);
 
 		/* , and subdisks. */
 		for (i = 0; i < numdisks; i++) {
 			disk = disks[i];
 			sd = &vol->v_subdisks[i];
 			sd->sd_disk = disk;
 			sd->sd_offset = (off_t)offs[i] * 512;
 			sd->sd_size = size;
 			if (disk == NULL)
 				continue;
 			TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next);
 			g_raid_change_disk_state(disk,
 			    G_RAID_DISK_S_ACTIVE);
 			g_raid_change_subdisk_state(sd,
 			    G_RAID_SUBDISK_S_ACTIVE);
 			g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW,
 			    G_RAID_EVENT_SUBDISK);
 		}
 
 		/* Write metadata based on created entities. */
 		G_RAID_DEBUG1(0, sc, "Array started.");
 		g_raid_md_write_promise(md, vol, NULL, NULL);
 
 		/* Pickup any STALE/SPARE disks to refill array if needed. */
 		g_raid_md_promise_refill(sc);
 
 		g_raid_event_send(vol, G_RAID_VOLUME_E_START,
 		    G_RAID_EVENT_VOLUME);
 		return (0);
 	}
 	if (strcmp(verb, "add") == 0) {
 
 		gctl_error(req, "`add` command is not applicable, "
 		    "use `label` instead.");
 		return (-99);
 	}
 	if (strcmp(verb, "delete") == 0) {
 
 		nodename = gctl_get_asciiparam(req, "arg0");
 		if (nodename != NULL && strcasecmp(sc->sc_name, nodename) != 0)
 			nodename = NULL;
 
 		/* Full node destruction. */
 		if (*nargs == 1 && nodename != NULL) {
 			/* Check if some volume is still open. */
 			force = gctl_get_paraml(req, "force", sizeof(*force));
 			if (force != NULL && *force == 0 &&
 			    g_raid_nopens(sc) != 0) {
 				gctl_error(req, "Some volume is still open.");
 				return (-4);
 			}
 
 			TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 				if (disk->d_consumer)
 					promise_meta_erase(disk->d_consumer);
 			}
 			g_raid_destroy_node(sc, 0);
 			return (0);
 		}
 
 		/* Destroy specified volume. If it was last - all node. */
 		if (*nargs > 2) {
 			gctl_error(req, "Invalid number of arguments.");
 			return (-1);
 		}
 		volname = gctl_get_asciiparam(req,
 		    nodename != NULL ? "arg1" : "arg0");
 		if (volname == NULL) {
 			gctl_error(req, "No volume name.");
 			return (-2);
 		}
 
 		/* Search for volume. */
 		TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
 			if (strcmp(vol->v_name, volname) == 0)
 				break;
 			pp = vol->v_provider;
 			if (pp == NULL)
 				continue;
 			if (strcmp(pp->name, volname) == 0)
 				break;
 			if (strncmp(pp->name, "raid/", 5) == 0 &&
 			    strcmp(pp->name + 5, volname) == 0)
 				break;
 		}
 		if (vol == NULL) {
 			i = strtol(volname, &tmp, 10);
 			if (verb != volname && tmp[0] == 0) {
 				TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
 					if (vol->v_global_id == i)
 						break;
 				}
 			}
 		}
 		if (vol == NULL) {
 			gctl_error(req, "Volume '%s' not found.", volname);
 			return (-3);
 		}
 
 		/* Check if volume is still open. */
 		force = gctl_get_paraml(req, "force", sizeof(*force));
 		if (force != NULL && *force == 0 &&
 		    vol->v_provider_open != 0) {
 			gctl_error(req, "Volume is still open.");
 			return (-4);
 		}
 
 		/* Destroy volume and potentially node. */
 		i = 0;
 		TAILQ_FOREACH(vol1, &sc->sc_volumes, v_next)
 			i++;
 		if (i >= 2) {
 			g_raid_destroy_volume(vol);
 			g_raid_md_promise_purge_disks(sc);
 			g_raid_md_write_promise(md, NULL, NULL, NULL);
 		} else {
 			TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 				if (disk->d_consumer)
 					promise_meta_erase(disk->d_consumer);
 			}
 			g_raid_destroy_node(sc, 0);
 		}
 		return (0);
 	}
 	if (strcmp(verb, "remove") == 0 ||
 	    strcmp(verb, "fail") == 0) {
 		if (*nargs < 2) {
 			gctl_error(req, "Invalid number of arguments.");
 			return (-1);
 		}
 		for (i = 1; i < *nargs; i++) {
 			snprintf(arg, sizeof(arg), "arg%d", i);
 			diskname = gctl_get_asciiparam(req, arg);
 			if (diskname == NULL) {
 				gctl_error(req, "No disk name (%s).", arg);
 				error = -2;
 				break;
 			}
 			if (strncmp(diskname, "/dev/", 5) == 0)
 				diskname += 5;
 
 			TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 				if (disk->d_consumer != NULL && 
 				    disk->d_consumer->provider != NULL &&
 				    strcmp(disk->d_consumer->provider->name,
 				     diskname) == 0)
 					break;
 			}
 			if (disk == NULL) {
 				gctl_error(req, "Disk '%s' not found.",
 				    diskname);
 				error = -3;
 				break;
 			}
 
 			if (strcmp(verb, "fail") == 0) {
 				g_raid_md_fail_disk_promise(md, NULL, disk);
 				continue;
 			}
 
 			/* Erase metadata on deleting disk and destroy it. */
 			promise_meta_erase(disk->d_consumer);
 			g_raid_destroy_disk(disk);
 		}
 		g_raid_md_promise_purge_volumes(sc);
 
 		/* Write updated metadata to remaining disks. */
 		g_raid_md_write_promise(md, NULL, NULL, NULL);
 
 		/* Check if anything left. */
 		if (g_raid_ndisks(sc, -1) == 0)
 			g_raid_destroy_node(sc, 0);
 		else
 			g_raid_md_promise_refill(sc);
 		return (error);
 	}
 	if (strcmp(verb, "insert") == 0) {
 		if (*nargs < 2) {
 			gctl_error(req, "Invalid number of arguments.");
 			return (-1);
 		}
 		for (i = 1; i < *nargs; i++) {
 			/* Get disk name. */
 			snprintf(arg, sizeof(arg), "arg%d", i);
 			diskname = gctl_get_asciiparam(req, arg);
 			if (diskname == NULL) {
 				gctl_error(req, "No disk name (%s).", arg);
 				error = -3;
 				break;
 			}
 
 			/* Try to find provider with specified name. */
 			g_topology_lock();
 			cp = g_raid_open_consumer(sc, diskname);
 			if (cp == NULL) {
 				gctl_error(req, "Can't open disk '%s'.",
 				    diskname);
 				g_topology_unlock();
 				error = -4;
 				break;
 			}
 			pp = cp->provider;
 			g_topology_unlock();
 
 			pd = malloc(sizeof(*pd), M_MD_PROMISE, M_WAITOK | M_ZERO);
 
 			disk = g_raid_create_disk(sc);
 			disk->d_consumer = cp;
 			disk->d_md_data = (void *)pd;
 			cp->private = disk;
 
 			g_raid_get_disk_info(disk);
 
 			/* Welcome the "new" disk. */
 			g_raid_change_disk_state(disk, G_RAID_DISK_S_SPARE);
 			promise_meta_write_spare(cp);
 			g_raid_md_promise_refill(sc);
 		}
 		return (error);
 	}
 	return (-100);
 }
 
 static int
 g_raid_md_write_promise(struct g_raid_md_object *md, struct g_raid_volume *tvol,
     struct g_raid_subdisk *tsd, struct g_raid_disk *tdisk)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_volume *vol;
 	struct g_raid_subdisk *sd;
 	struct g_raid_disk *disk;
 	struct g_raid_md_promise_perdisk *pd;
 	struct g_raid_md_promise_pervolume *pv;
 	struct promise_raid_conf *meta;
 	off_t rebuild_lba64;
 	int i, j, pos, rebuild;
 
 	sc = md->mdo_softc;
 
 	if (sc->sc_stopping == G_RAID_DESTROY_HARD)
 		return (0);
 
 	/* Generate new per-volume metadata for affected volumes. */
 	TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
 		if (vol->v_stopping)
 			continue;
 
 		/* Skip volumes not related to specified targets. */
 		if (tvol != NULL && vol != tvol)
 			continue;
 		if (tsd != NULL && vol != tsd->sd_volume)
 			continue;
 		if (tdisk != NULL) {
 			for (i = 0; i < vol->v_disks_count; i++) {
 				if (vol->v_subdisks[i].sd_disk == tdisk)
 					break;
 			}
 			if (i >= vol->v_disks_count)
 				continue;
 		}
 
 		pv = (struct g_raid_md_promise_pervolume *)vol->v_md_data;
 		pv->pv_generation++;
 
 		meta = malloc(sizeof(*meta), M_MD_PROMISE, M_WAITOK | M_ZERO);
 		if (pv->pv_meta != NULL)
 			memcpy(meta, pv->pv_meta, sizeof(*meta));
 		memcpy(meta->promise_id, PROMISE_MAGIC,
 		    sizeof(PROMISE_MAGIC) - 1);
 		meta->dummy_0 = 0x00020000;
 		meta->integrity = PROMISE_I_VALID;
 
 		meta->generation = pv->pv_generation;
 		meta->status = PROMISE_S_VALID | PROMISE_S_ONLINE |
 		    PROMISE_S_INITED | PROMISE_S_READY;
 		if (vol->v_state <= G_RAID_VOLUME_S_DEGRADED)
 			meta->status |= PROMISE_S_DEGRADED;
 		if (vol->v_dirty)
 			meta->status |= PROMISE_S_MARKED; /* XXX: INVENTED! */
 		if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID0 ||
 		    vol->v_raid_level == G_RAID_VOLUME_RL_SINGLE)
 			meta->type = PROMISE_T_RAID0;
 		else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1 ||
 		    vol->v_raid_level == G_RAID_VOLUME_RL_RAID1E)
 			meta->type = PROMISE_T_RAID1;
 		else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID3)
 			meta->type = PROMISE_T_RAID3;
 		else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID5)
 			meta->type = PROMISE_T_RAID5;
 		else if (vol->v_raid_level == G_RAID_VOLUME_RL_CONCAT)
 			meta->type = PROMISE_T_SPAN;
 		else
 			meta->type = PROMISE_T_JBOD;
 		meta->total_disks = vol->v_disks_count;
 		meta->stripe_shift = ffs(vol->v_strip_size / 1024);
 		meta->array_width = vol->v_disks_count;
 		if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1 ||
 		    vol->v_raid_level == G_RAID_VOLUME_RL_RAID1E)
 			meta->array_width /= 2;
 		meta->array_number = vol->v_global_id;
 		meta->total_sectors = vol->v_mediasize / 512;
 		meta->total_sectors_high = (vol->v_mediasize / 512) >> 32;
 		meta->sector_size = vol->v_sectorsize / 512;
 		meta->cylinders = meta->total_sectors / (255 * 63) - 1;
 		meta->heads = 254;
 		meta->sectors = 63;
 		meta->volume_id = pv->pv_id;
 		rebuild_lba64 = UINT64_MAX;
 		rebuild = 0;
 		for (i = 0; i < vol->v_disks_count; i++) {
 			sd = &vol->v_subdisks[i];
 			/* For RAID0+1 we need to translate order. */
 			pos = promise_meta_translate_disk(vol, i);
 			meta->disks[pos].flags = PROMISE_F_VALID |
 			    PROMISE_F_ASSIGNED;
 			if (sd->sd_state == G_RAID_SUBDISK_S_NONE) {
 				meta->disks[pos].flags |= 0;
 			} else if (sd->sd_state == G_RAID_SUBDISK_S_FAILED) {
 				meta->disks[pos].flags |=
 				    PROMISE_F_DOWN | PROMISE_F_REDIR;
 			} else if (sd->sd_state <= G_RAID_SUBDISK_S_REBUILD) {
 				meta->disks[pos].flags |=
 				    PROMISE_F_ONLINE | PROMISE_F_REDIR;
 				if (sd->sd_state == G_RAID_SUBDISK_S_REBUILD) {
 					rebuild_lba64 = MIN(rebuild_lba64,
 					    sd->sd_rebuild_pos / 512);
 				} else
 					rebuild_lba64 = 0;
 				rebuild = 1;
 			} else {
 				meta->disks[pos].flags |= PROMISE_F_ONLINE;
 				if (sd->sd_state < G_RAID_SUBDISK_S_ACTIVE) {
 					meta->status |= PROMISE_S_MARKED;
 					if (sd->sd_state == G_RAID_SUBDISK_S_RESYNC) {
 						rebuild_lba64 = MIN(rebuild_lba64,
 						    sd->sd_rebuild_pos / 512);
 					} else
 						rebuild_lba64 = 0;
 				}
 			}
 			if (pv->pv_meta != NULL) {
 				meta->disks[pos].id = pv->pv_meta->disks[pos].id;
 			} else {
 				meta->disks[pos].number = i * 2;
 				arc4rand(&meta->disks[pos].id,
 				    sizeof(meta->disks[pos].id), 0);
 			}
 		}
 		promise_meta_put_name(meta, vol->v_name);
 
 		/* Try to mimic AMD BIOS rebuild/resync behavior. */
 		if (rebuild_lba64 != UINT64_MAX) {
 			if (rebuild)
 				meta->magic_3 = 0x03040010UL; /* Rebuild? */
 			else
 				meta->magic_3 = 0x03040008UL; /* Resync? */
 			/* Translate from per-disk to per-volume LBA. */
 			if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1 ||
 			    vol->v_raid_level == G_RAID_VOLUME_RL_RAID1E) {
 				rebuild_lba64 *= meta->array_width;
 			} else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID3 ||
 			    vol->v_raid_level == G_RAID_VOLUME_RL_RAID5) {
 				rebuild_lba64 *= meta->array_width - 1;
 			} else
 				rebuild_lba64 = 0;
 		} else
 			meta->magic_3 = 0x03000000UL;
 		meta->rebuild_lba64 = rebuild_lba64;
 		meta->magic_4 = 0x04010101UL;
 
 		/* Replace per-volume metadata with new. */
 		if (pv->pv_meta != NULL)
 			free(pv->pv_meta, M_MD_PROMISE);
 		pv->pv_meta = meta;
 
 		/* Copy new metadata to the disks, adding or replacing old. */
 		for (i = 0; i < vol->v_disks_count; i++) {
 			sd = &vol->v_subdisks[i];
 			disk = sd->sd_disk;
 			if (disk == NULL)
 				continue;
 			/* For RAID0+1 we need to translate order. */
 			pos = promise_meta_translate_disk(vol, i);
 			pd = (struct g_raid_md_promise_perdisk *)disk->d_md_data;
 			for (j = 0; j < pd->pd_subdisks; j++) {
 				if (pd->pd_meta[j]->volume_id == meta->volume_id)
 					break;
 			}
 			if (j == pd->pd_subdisks)
 				pd->pd_subdisks++;
 			if (pd->pd_meta[j] != NULL)
 				free(pd->pd_meta[j], M_MD_PROMISE);
 			pd->pd_meta[j] = promise_meta_copy(meta);
 			pd->pd_meta[j]->disk = meta->disks[pos];
 			pd->pd_meta[j]->disk.number = pos;
 			pd->pd_meta[j]->disk_offset_high =
 			    (sd->sd_offset / 512) >> 32;
 			pd->pd_meta[j]->disk_offset = sd->sd_offset / 512;
 			pd->pd_meta[j]->disk_sectors_high =
 			    (sd->sd_size / 512) >> 32;
 			pd->pd_meta[j]->disk_sectors = sd->sd_size / 512;
 			if (sd->sd_state == G_RAID_SUBDISK_S_REBUILD) {
 				pd->pd_meta[j]->disk_rebuild_high =
 				    (sd->sd_rebuild_pos / 512) >> 32;
 				pd->pd_meta[j]->disk_rebuild =
 				    sd->sd_rebuild_pos / 512;
 			} else if (sd->sd_state < G_RAID_SUBDISK_S_REBUILD) {
 				pd->pd_meta[j]->disk_rebuild_high = 0;
 				pd->pd_meta[j]->disk_rebuild = 0;
 			} else {
 				pd->pd_meta[j]->disk_rebuild_high = UINT32_MAX;
 				pd->pd_meta[j]->disk_rebuild = UINT32_MAX;
 			}
 			pd->pd_updated = 1;
 		}
 	}
 
 	TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 		pd = (struct g_raid_md_promise_perdisk *)disk->d_md_data;
 		if (disk->d_state != G_RAID_DISK_S_ACTIVE)
 			continue;
 		if (!pd->pd_updated)
 			continue;
 		G_RAID_DEBUG(1, "Writing Promise metadata to %s",
 		    g_raid_get_diskname(disk));
 		for (i = 0; i < pd->pd_subdisks; i++)
 			g_raid_md_promise_print(pd->pd_meta[i]);
 		promise_meta_write(disk->d_consumer,
 		    pd->pd_meta, pd->pd_subdisks);
 		pd->pd_updated = 0;
 	}
 
 	return (0);
 }
 
 static int
 g_raid_md_fail_disk_promise(struct g_raid_md_object *md,
     struct g_raid_subdisk *tsd, struct g_raid_disk *tdisk)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_md_promise_perdisk *pd;
 	struct g_raid_subdisk *sd;
 	int i, pos;
 
 	sc = md->mdo_softc;
 	pd = (struct g_raid_md_promise_perdisk *)tdisk->d_md_data;
 
 	/* We can't fail disk that is not a part of array now. */
 	if (tdisk->d_state != G_RAID_DISK_S_ACTIVE)
 		return (-1);
 
 	/*
 	 * Mark disk as failed in metadata and try to write that metadata
 	 * to the disk itself to prevent it's later resurrection as STALE.
 	 */
 	if (pd->pd_subdisks > 0 && tdisk->d_consumer != NULL)
 		G_RAID_DEBUG(1, "Writing Promise metadata to %s",
 		    g_raid_get_diskname(tdisk));
 	for (i = 0; i < pd->pd_subdisks; i++) {
 		pd->pd_meta[i]->disk.flags |=
 		    PROMISE_F_DOWN | PROMISE_F_REDIR;
 		pos = pd->pd_meta[i]->disk.number;
 		if (pos >= 0 && pos < PROMISE_MAX_DISKS) {
 			pd->pd_meta[i]->disks[pos].flags |=
 			    PROMISE_F_DOWN | PROMISE_F_REDIR;
 		}
 		g_raid_md_promise_print(pd->pd_meta[i]);
 	}
 	if (tdisk->d_consumer != NULL)
 		promise_meta_write(tdisk->d_consumer,
 		    pd->pd_meta, pd->pd_subdisks);
 
 	/* Change states. */
 	g_raid_change_disk_state(tdisk, G_RAID_DISK_S_FAILED);
 	TAILQ_FOREACH(sd, &tdisk->d_subdisks, sd_next) {
 		g_raid_change_subdisk_state(sd,
 		    G_RAID_SUBDISK_S_FAILED);
 		g_raid_event_send(sd, G_RAID_SUBDISK_E_FAILED,
 		    G_RAID_EVENT_SUBDISK);
 	}
 
 	/* Write updated metadata to remaining disks. */
 	g_raid_md_write_promise(md, NULL, NULL, tdisk);
 
 	g_raid_md_promise_refill(sc);
 	return (0);
 }
 
 static int
 g_raid_md_free_disk_promise(struct g_raid_md_object *md,
     struct g_raid_disk *disk)
 {
 	struct g_raid_md_promise_perdisk *pd;
 	int i;
 
 	pd = (struct g_raid_md_promise_perdisk *)disk->d_md_data;
 	for (i = 0; i < pd->pd_subdisks; i++) {
 		if (pd->pd_meta[i] != NULL) {
 			free(pd->pd_meta[i], M_MD_PROMISE);
 			pd->pd_meta[i] = NULL;
 		}
 	}
 	free(pd, M_MD_PROMISE);
 	disk->d_md_data = NULL;
 	return (0);
 }
 
 static int
 g_raid_md_free_volume_promise(struct g_raid_md_object *md,
     struct g_raid_volume *vol)
 {
 	struct g_raid_md_promise_pervolume *pv;
 
 	pv = (struct g_raid_md_promise_pervolume *)vol->v_md_data;
 	if (pv && pv->pv_meta != NULL) {
 		free(pv->pv_meta, M_MD_PROMISE);
 		pv->pv_meta = NULL;
 	}
 	if (pv && !pv->pv_started) {
 		pv->pv_started = 1;
 		callout_stop(&pv->pv_start_co);
 	}
 	free(pv, M_MD_PROMISE);
 	vol->v_md_data = NULL;
 	return (0);
 }
 
 static int
 g_raid_md_free_promise(struct g_raid_md_object *md)
 {
 
 	return (0);
 }
 
 G_RAID_MD_DECLARE(promise, "Promise");
Index: head/sys/geom/raid/md_sii.c
===================================================================
--- head/sys/geom/raid/md_sii.c	(revision 350693)
+++ head/sys/geom/raid/md_sii.c	(revision 350694)
@@ -1,1673 +1,1674 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2011 Alexander Motin <mav@FreeBSD.org>
  * Copyright (c) 2000 - 2008 Søren Schmidt <sos@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/bio.h>
 #include <sys/endian.h>
 #include <sys/kernel.h>
 #include <sys/kobj.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/systm.h>
 #include <sys/taskqueue.h>
 #include <geom/geom.h>
+#include <geom/geom_dbg.h>
 #include "geom/raid/g_raid.h"
 #include "g_raid_md_if.h"
 
 static MALLOC_DEFINE(M_MD_SII, "md_sii_data", "GEOM_RAID SiI metadata");
 
 struct sii_raid_conf {
 	uint16_t	ata_params_00_53[54];
 	uint64_t	total_sectors;		/* 54 - 57 */
 	uint16_t	ata_params_58_81[72];
 	uint16_t	product_id;		/* 130 */
 	uint16_t	vendor_id;		/* 131 */
 	uint16_t	version_minor;		/* 132 */
 	uint16_t	version_major;		/* 133 */
 	uint8_t		timestamp[6];		/* 134 - 136 */
 	uint16_t	strip_sectors;		/* 137 */
 	uint16_t	dummy_2;
 	uint8_t		disk_number;		/* 139 */
 	uint8_t		type;
 #define SII_T_RAID0             0x00
 #define SII_T_RAID1             0x01
 #define SII_T_RAID01            0x02
 #define SII_T_SPARE             0x03
 #define SII_T_CONCAT            0x04
 #define SII_T_RAID5             0x10
 #define SII_T_RESERVED          0xfd
 #define SII_T_JBOD              0xff
 
 	uint8_t		raid0_disks;		/* 140 */
 	uint8_t		raid0_ident;
 	uint8_t		raid1_disks;		/* 141 */
 	uint8_t		raid1_ident;
 	uint64_t	rebuild_lba;		/* 142 - 145 */
 	uint32_t	generation;		/* 146 - 147 */
 	uint8_t		disk_status;		/* 148 */
 #define SII_S_CURRENT           0x01
 #define SII_S_REBUILD           0x02
 #define SII_S_DROPPED           0x03
 #define SII_S_REMOVED           0x04
 
 	uint8_t		raid_status;
 #define SII_S_ONLINE            0x01
 #define SII_S_AVAILABLE         0x02
 
 	uint8_t		raid_location;		/* 149 */
 	uint8_t		disk_location;
 	uint8_t		auto_rebuild;		/* 150 */
 #define SII_R_REBUILD           0x00
 #define SII_R_NOREBUILD         0xff
 
 	uint8_t		dummy_3;
 	uint8_t		name[16];		/* 151 - 158 */
 	uint16_t	checksum;		/* 159 */
 	uint16_t	ata_params_160_255[96];
 } __packed;
 
 struct g_raid_md_sii_perdisk {
 	struct sii_raid_conf	*pd_meta;
 	int			 pd_disk_pos;
 	off_t			 pd_disk_size;
 };
 
 struct g_raid_md_sii_object {
 	struct g_raid_md_object	 mdio_base;
 	uint8_t			 mdio_timestamp[6];
 	uint8_t			 mdio_location;
 	uint32_t		 mdio_generation;
 	struct sii_raid_conf	*mdio_meta;
 	struct callout		 mdio_start_co;	/* STARTING state timer. */
 	int			 mdio_total_disks;
 	int			 mdio_disks_present;
 	int			 mdio_started;
 	int			 mdio_incomplete;
 	struct root_hold_token	*mdio_rootmount; /* Root mount delay token. */
 };
 
 static g_raid_md_create_t g_raid_md_create_sii;
 static g_raid_md_taste_t g_raid_md_taste_sii;
 static g_raid_md_event_t g_raid_md_event_sii;
 static g_raid_md_ctl_t g_raid_md_ctl_sii;
 static g_raid_md_write_t g_raid_md_write_sii;
 static g_raid_md_fail_disk_t g_raid_md_fail_disk_sii;
 static g_raid_md_free_disk_t g_raid_md_free_disk_sii;
 static g_raid_md_free_t g_raid_md_free_sii;
 
 static kobj_method_t g_raid_md_sii_methods[] = {
 	KOBJMETHOD(g_raid_md_create,	g_raid_md_create_sii),
 	KOBJMETHOD(g_raid_md_taste,	g_raid_md_taste_sii),
 	KOBJMETHOD(g_raid_md_event,	g_raid_md_event_sii),
 	KOBJMETHOD(g_raid_md_ctl,	g_raid_md_ctl_sii),
 	KOBJMETHOD(g_raid_md_write,	g_raid_md_write_sii),
 	KOBJMETHOD(g_raid_md_fail_disk,	g_raid_md_fail_disk_sii),
 	KOBJMETHOD(g_raid_md_free_disk,	g_raid_md_free_disk_sii),
 	KOBJMETHOD(g_raid_md_free,	g_raid_md_free_sii),
 	{ 0, 0 }
 };
 
 static struct g_raid_md_class g_raid_md_sii_class = {
 	"SiI",
 	g_raid_md_sii_methods,
 	sizeof(struct g_raid_md_sii_object),
 	.mdc_enable = 1,
 	.mdc_priority = 100
 };
 
 static void
 g_raid_md_sii_print(struct sii_raid_conf *meta)
 {
 
 	if (g_raid_debug < 1)
 		return;
 
 	printf("********* ATA SiI RAID Metadata *********\n");
 	printf("total_sectors       %llu\n",
 	    (long long unsigned)meta->total_sectors);
 	printf("product_id          0x%04x\n", meta->product_id);
 	printf("vendor_id           0x%04x\n", meta->vendor_id);
 	printf("version_minor       0x%04x\n", meta->version_minor);
 	printf("version_major       0x%04x\n", meta->version_major);
 	printf("timestamp           0x%02x%02x%02x%02x%02x%02x\n",
 	    meta->timestamp[5], meta->timestamp[4], meta->timestamp[3],
 	    meta->timestamp[2], meta->timestamp[1], meta->timestamp[0]);
 	printf("strip_sectors       %d\n", meta->strip_sectors);
 	printf("disk_number         %d\n", meta->disk_number);
 	printf("type                0x%02x\n", meta->type);
 	printf("raid0_disks         %d\n", meta->raid0_disks);
 	printf("raid0_ident         %d\n", meta->raid0_ident);
 	printf("raid1_disks         %d\n", meta->raid1_disks);
 	printf("raid1_ident         %d\n", meta->raid1_ident);
 	printf("rebuild_lba         %llu\n",
 	    (long long unsigned)meta->rebuild_lba);
 	printf("generation          %d\n", meta->generation);
 	printf("disk_status         %d\n", meta->disk_status);
 	printf("raid_status         %d\n", meta->raid_status);
 	printf("raid_location       %d\n", meta->raid_location);
 	printf("disk_location       %d\n", meta->disk_location);
 	printf("auto_rebuild        %d\n", meta->auto_rebuild);
 	printf("name                <%.16s>\n", meta->name);
 	printf("checksum            0x%04x\n", meta->checksum);
 	printf("=================================================\n");
 }
 
 static struct sii_raid_conf *
 sii_meta_copy(struct sii_raid_conf *meta)
 {
 	struct sii_raid_conf *nmeta;
 
 	nmeta = malloc(sizeof(*meta), M_MD_SII, M_WAITOK);
 	memcpy(nmeta, meta, sizeof(*meta));
 	return (nmeta);
 }
 
 static int
 sii_meta_total_disks(struct sii_raid_conf *meta)
 {
 
 	switch (meta->type) {
 	case SII_T_RAID0:
 	case SII_T_RAID5:
 	case SII_T_CONCAT:
 		return (meta->raid0_disks);
 	case SII_T_RAID1:
 		return (meta->raid1_disks);
 	case SII_T_RAID01:
 		return (meta->raid0_disks * meta->raid1_disks);
 	case SII_T_SPARE:
 	case SII_T_JBOD:
 		return (1);
 	}
 	return (0);
 }
 
 static int
 sii_meta_disk_pos(struct sii_raid_conf *meta, struct sii_raid_conf *pdmeta)
 {
 
 	if (pdmeta->type == SII_T_SPARE)
 		return (-3);
 
 	if (memcmp(&meta->timestamp, &pdmeta->timestamp, 6) != 0)
 		return (-1);
 
 	switch (pdmeta->type) {
 	case SII_T_RAID0:
 	case SII_T_RAID1:
 	case SII_T_RAID5:
 	case SII_T_CONCAT:
 		return (pdmeta->disk_number);
 	case SII_T_RAID01:
 		return (pdmeta->raid1_ident * pdmeta->raid1_disks +
 		    pdmeta->raid0_ident);
 	case SII_T_JBOD:
 		return (0);
 	}
 	return (-1);
 }
 
 static void
 sii_meta_get_name(struct sii_raid_conf *meta, char *buf)
 {
 	int i;
 
 	strncpy(buf, meta->name, 16);
 	buf[16] = 0;
 	for (i = 15; i >= 0; i--) {
 		if (buf[i] > 0x20)
 			break;
 		buf[i] = 0;
 	}
 }
 
 static void
 sii_meta_put_name(struct sii_raid_conf *meta, char *buf)
 {
 
 	memset(meta->name, 0x20, 16);
 	memcpy(meta->name, buf, MIN(strlen(buf), 16));
 }
 
 static struct sii_raid_conf *
 sii_meta_read(struct g_consumer *cp)
 {
 	struct g_provider *pp;
 	struct sii_raid_conf *meta;
 	char *buf;
 	int error, i;
 	uint16_t checksum, *ptr;
 
 	pp = cp->provider;
 
 	/* Read the anchor sector. */
 	buf = g_read_data(cp,
 	    pp->mediasize - pp->sectorsize, pp->sectorsize, &error);
 	if (buf == NULL) {
 		G_RAID_DEBUG(1, "Cannot read metadata from %s (error=%d).",
 		    pp->name, error);
 		return (NULL);
 	}
 	meta = (struct sii_raid_conf *)buf;
 
 	/* Check vendor ID. */
 	if (meta->vendor_id != 0x1095) {
 		G_RAID_DEBUG(1, "SiI vendor ID check failed on %s (0x%04x)",
 		    pp->name, meta->vendor_id);
 		g_free(buf);
 		return (NULL);
 	}
 
 	/* Check metadata major version. */
 	if (meta->version_major != 2) {
 		G_RAID_DEBUG(1, "SiI version check failed on %s (%d.%d)",
 		    pp->name, meta->version_major, meta->version_minor);
 		g_free(buf);
 		return (NULL);
 	}
 	meta = malloc(sizeof(*meta), M_MD_SII, M_WAITOK);
 	memcpy(meta, buf, min(sizeof(*meta), pp->sectorsize));
 	g_free(buf);
 
 	/* Check metadata checksum. */
 	for (checksum = 0, ptr = (uint16_t *)meta, i = 0; i <= 159; i++)
 		checksum += *ptr++;
 	if (checksum != 0) {
 		G_RAID_DEBUG(1, "SiI checksum check failed on %s", pp->name);
 		free(meta, M_MD_SII);
 		return (NULL);
 	}
 
 	/* Check raid type. */
 	if (meta->type != SII_T_RAID0 && meta->type != SII_T_RAID1 &&
 	    meta->type != SII_T_RAID01 && meta->type != SII_T_SPARE &&
 	    meta->type != SII_T_RAID5 && meta->type != SII_T_CONCAT &&
 	    meta->type != SII_T_JBOD) {
 		G_RAID_DEBUG(1, "SiI unknown RAID level on %s (0x%02x)",
 		    pp->name, meta->type);
 		free(meta, M_MD_SII);
 		return (NULL);
 	}
 
 	return (meta);
 }
 
 static int
 sii_meta_write(struct g_consumer *cp, struct sii_raid_conf *meta)
 {
 	struct g_provider *pp;
 	char *buf;
 	int error, i;
 	uint16_t checksum, *ptr;
 
 	pp = cp->provider;
 
 	/* Recalculate checksum for case if metadata were changed. */
 	meta->checksum = 0;
 	for (checksum = 0, ptr = (uint16_t *)meta, i = 0; i < 159; i++)
 		checksum += *ptr++;
 	meta->checksum -= checksum;
 
 	/* Create and fill buffer. */
 	buf = malloc(pp->sectorsize, M_MD_SII, M_WAITOK | M_ZERO);
 	memcpy(buf, meta, sizeof(*meta));
 
 	/* Write 4 copies of metadata. */
 	for (i = 0; i < 4; i++) {
 		error = g_write_data(cp,
 		    pp->mediasize - (pp->sectorsize * (1 + 0x200 * i)),
 		    buf, pp->sectorsize);
 		if (error != 0) {
 			G_RAID_DEBUG(1, "Cannot write metadata to %s (error=%d).",
 			    pp->name, error);
 			break;
 		}
 	}
 
 	free(buf, M_MD_SII);
 	return (error);
 }
 
 static int
 sii_meta_erase(struct g_consumer *cp)
 {
 	struct g_provider *pp;
 	char *buf;
 	int error, i;
 
 	pp = cp->provider;
 	buf = malloc(pp->sectorsize, M_MD_SII, M_WAITOK | M_ZERO);
 	/* Write 4 copies of metadata. */
 	for (i = 0; i < 4; i++) {
 		error = g_write_data(cp,
 		    pp->mediasize - (pp->sectorsize * (1 + 0x200 * i)),
 		    buf, pp->sectorsize);
 		if (error != 0) {
 			G_RAID_DEBUG(1, "Cannot erase metadata on %s (error=%d).",
 			    pp->name, error);
 		}
 	}
 	free(buf, M_MD_SII);
 	return (error);
 }
 
 static int
 sii_meta_write_spare(struct g_consumer *cp)
 {
 	struct sii_raid_conf *meta;
 	int error;
 
 	meta = malloc(sizeof(*meta), M_MD_SII, M_WAITOK | M_ZERO);
 	meta->total_sectors = cp->provider->mediasize /
 	    cp->provider->sectorsize - 0x800;
 	meta->vendor_id = 0x1095;
 	meta->version_minor = 0;
 	meta->version_major = 2;
 	meta->timestamp[0] = arc4random();
 	meta->timestamp[1] = arc4random();
 	meta->timestamp[2] = arc4random();
 	meta->timestamp[3] = arc4random();
 	meta->timestamp[4] = arc4random();
 	meta->timestamp[5] = arc4random();
 	meta->type = SII_T_SPARE;
 	meta->generation = 1;
 	meta->raid1_ident = 0xff;
 	meta->raid_location = arc4random();
 	error = sii_meta_write(cp, meta);
 	free(meta, M_MD_SII);
 	return (error);
 }
 
 static struct g_raid_disk *
 g_raid_md_sii_get_disk(struct g_raid_softc *sc, int id)
 {
 	struct g_raid_disk	*disk;
 	struct g_raid_md_sii_perdisk *pd;
 
 	TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 		pd = (struct g_raid_md_sii_perdisk *)disk->d_md_data;
 		if (pd->pd_disk_pos == id)
 			break;
 	}
 	return (disk);
 }
 
 static int
 g_raid_md_sii_supported(int level, int qual, int disks, int force)
 {
 
 	if (disks > 8)
 		return (0);
 	switch (level) {
 	case G_RAID_VOLUME_RL_RAID0:
 		if (disks < 1)
 			return (0);
 		if (!force && (disks < 2 || disks > 6))
 			return (0);
 		break;
 	case G_RAID_VOLUME_RL_RAID1:
 		if (disks < 1)
 			return (0);
 		if (!force && (disks != 2))
 			return (0);
 		break;
 	case G_RAID_VOLUME_RL_RAID1E:
 		if (disks < 2)
 			return (0);
 		if (disks % 2 != 0)
 			return (0);
 		if (!force && (disks < 4))
 			return (0);
 		break;
 	case G_RAID_VOLUME_RL_SINGLE:
 		if (disks != 1)
 			return (0);
 		break;
 	case G_RAID_VOLUME_RL_CONCAT:
 		if (disks < 2)
 			return (0);
 		break;
 	case G_RAID_VOLUME_RL_RAID5:
 		if (disks < 3)
 			return (0);
 		if (qual != G_RAID_VOLUME_RLQ_R5LS)
 			return (0);
 		break;
 	default:
 		return (0);
 	}
 	if (level != G_RAID_VOLUME_RL_RAID5 && qual != G_RAID_VOLUME_RLQ_NONE)
 		return (0);
 	return (1);
 }
 
 static int
 g_raid_md_sii_start_disk(struct g_raid_disk *disk)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_subdisk *sd, *tmpsd;
 	struct g_raid_disk *olddisk, *tmpdisk;
 	struct g_raid_md_object *md;
 	struct g_raid_md_sii_object *mdi;
 	struct g_raid_md_sii_perdisk *pd, *oldpd;
 	struct sii_raid_conf *meta;
 	int disk_pos, resurrection = 0;
 
 	sc = disk->d_softc;
 	md = sc->sc_md;
 	mdi = (struct g_raid_md_sii_object *)md;
 	meta = mdi->mdio_meta;
 	pd = (struct g_raid_md_sii_perdisk *)disk->d_md_data;
 	olddisk = NULL;
 
 	/* Find disk position in metadata by its serial. */
 	if (pd->pd_meta != NULL)
 		disk_pos = sii_meta_disk_pos(meta, pd->pd_meta);
 	else
 		disk_pos = -3;
 	if (disk_pos < 0) {
 		G_RAID_DEBUG1(1, sc, "Unknown, probably new or stale disk");
 		/* If we are in the start process, that's all for now. */
 		if (!mdi->mdio_started)
 			goto nofit;
 		/*
 		 * If we have already started - try to get use of the disk.
 		 * Try to replace OFFLINE disks first, then FAILED.
 		 */
 		TAILQ_FOREACH(tmpdisk, &sc->sc_disks, d_next) {
 			if (tmpdisk->d_state != G_RAID_DISK_S_OFFLINE &&
 			    tmpdisk->d_state != G_RAID_DISK_S_FAILED)
 				continue;
 			/* Make sure this disk is big enough. */
 			TAILQ_FOREACH(sd, &tmpdisk->d_subdisks, sd_next) {
 				if (sd->sd_offset + sd->sd_size + 512 >
 				    pd->pd_disk_size) {
 					G_RAID_DEBUG1(1, sc,
 					    "Disk too small (%ju < %ju)",
 					    pd->pd_disk_size,
 					    sd->sd_offset + sd->sd_size + 512);
 					break;
 				}
 			}
 			if (sd != NULL)
 				continue;
 			if (tmpdisk->d_state == G_RAID_DISK_S_OFFLINE) {
 				olddisk = tmpdisk;
 				break;
 			} else if (olddisk == NULL)
 				olddisk = tmpdisk;
 		}
 		if (olddisk == NULL) {
 nofit:
 			if (disk_pos == -3 || pd->pd_disk_pos == -3) {
 				g_raid_change_disk_state(disk,
 				    G_RAID_DISK_S_SPARE);
 				return (1);
 			} else {
 				g_raid_change_disk_state(disk,
 				    G_RAID_DISK_S_STALE);
 				return (0);
 			}
 		}
 		oldpd = (struct g_raid_md_sii_perdisk *)olddisk->d_md_data;
 		disk_pos = oldpd->pd_disk_pos;
 		resurrection = 1;
 	}
 
 	if (olddisk == NULL) {
 		/* Find placeholder by position. */
 		olddisk = g_raid_md_sii_get_disk(sc, disk_pos);
 		if (olddisk == NULL)
 			panic("No disk at position %d!", disk_pos);
 		if (olddisk->d_state != G_RAID_DISK_S_OFFLINE) {
 			G_RAID_DEBUG1(1, sc, "More than one disk for pos %d",
 			    disk_pos);
 			g_raid_change_disk_state(disk, G_RAID_DISK_S_STALE);
 			return (0);
 		}
 		oldpd = (struct g_raid_md_sii_perdisk *)olddisk->d_md_data;
 	}
 
 	/* Replace failed disk or placeholder with new disk. */
 	TAILQ_FOREACH_SAFE(sd, &olddisk->d_subdisks, sd_next, tmpsd) {
 		TAILQ_REMOVE(&olddisk->d_subdisks, sd, sd_next);
 		TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next);
 		sd->sd_disk = disk;
 	}
 	oldpd->pd_disk_pos = -2;
 	pd->pd_disk_pos = disk_pos;
 
 	/* If it was placeholder -- destroy it. */
 	if (olddisk->d_state == G_RAID_DISK_S_OFFLINE) {
 		g_raid_destroy_disk(olddisk);
 	} else {
 		/* Otherwise, make it STALE_FAILED. */
 		g_raid_change_disk_state(olddisk, G_RAID_DISK_S_STALE_FAILED);
 	}
 
 	/* Welcome the new disk. */
 	if (resurrection)
 		g_raid_change_disk_state(disk, G_RAID_DISK_S_ACTIVE);
 	else if (pd->pd_meta->disk_status == SII_S_CURRENT ||
 	    pd->pd_meta->disk_status == SII_S_REBUILD)
 		g_raid_change_disk_state(disk, G_RAID_DISK_S_ACTIVE);
 	else
 		g_raid_change_disk_state(disk, G_RAID_DISK_S_FAILED);
 	TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) {
 
 		/*
 		 * Different disks may have different sizes,
 		 * in concat mode. Update from real disk size.
 		 */
 		if (meta->type == SII_T_CONCAT || meta->type == SII_T_JBOD)
 			sd->sd_size = pd->pd_disk_size - 0x800 * 512;
 
 		if (resurrection) {
 			/* New or ex-spare disk. */
 			g_raid_change_subdisk_state(sd,
 			    G_RAID_SUBDISK_S_NEW);
 		} else if (pd->pd_meta->disk_status == SII_S_REBUILD) {
 			/* Rebuilding disk. */
 			g_raid_change_subdisk_state(sd,
 			    G_RAID_SUBDISK_S_REBUILD);
 			if (pd->pd_meta->generation == meta->generation)
 				sd->sd_rebuild_pos = pd->pd_meta->rebuild_lba * 512;
 			else
 				sd->sd_rebuild_pos = 0;
 		} else if (pd->pd_meta->disk_status == SII_S_CURRENT) {
 			if (pd->pd_meta->raid_status == SII_S_ONLINE ||
 			    pd->pd_meta->generation != meta->generation) {
 				/* Dirty or resyncing disk. */
 				g_raid_change_subdisk_state(sd,
 				    G_RAID_SUBDISK_S_STALE);
 			} else {
 				/* Up to date disk. */
 				g_raid_change_subdisk_state(sd,
 				    G_RAID_SUBDISK_S_ACTIVE);
 			}
 		} else {
 			g_raid_change_subdisk_state(sd,
 			    G_RAID_SUBDISK_S_FAILED);
 		}
 		g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW,
 		    G_RAID_EVENT_SUBDISK);
 	}
 
 	/* Update status of our need for spare. */
 	if (mdi->mdio_started) {
 		mdi->mdio_incomplete =
 		    (g_raid_ndisks(sc, G_RAID_DISK_S_ACTIVE) <
 		     mdi->mdio_total_disks);
 	}
 
 	return (resurrection);
 }
 
 static void
 g_disk_md_sii_retaste(void *arg, int pending)
 {
 
 	G_RAID_DEBUG(1, "Array is not complete, trying to retaste.");
 	g_retaste(&g_raid_class);
 	free(arg, M_MD_SII);
 }
 
 static void
 g_raid_md_sii_refill(struct g_raid_softc *sc)
 {
 	struct g_raid_md_object *md;
 	struct g_raid_md_sii_object *mdi;
 	struct g_raid_disk *disk;
 	struct task *task;
 	int update, na;
 
 	md = sc->sc_md;
 	mdi = (struct g_raid_md_sii_object *)md;
 	update = 0;
 	do {
 		/* Make sure we miss anything. */
 		na = g_raid_ndisks(sc, G_RAID_DISK_S_ACTIVE);
 		if (na == mdi->mdio_total_disks)
 			break;
 
 		G_RAID_DEBUG1(1, md->mdo_softc,
 		    "Array is not complete (%d of %d), "
 		    "trying to refill.", na, mdi->mdio_total_disks);
 
 		/* Try to get use some of STALE disks. */
 		TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 			if (disk->d_state == G_RAID_DISK_S_STALE) {
 				update += g_raid_md_sii_start_disk(disk);
 				if (disk->d_state == G_RAID_DISK_S_ACTIVE)
 					break;
 			}
 		}
 		if (disk != NULL)
 			continue;
 
 		/* Try to get use some of SPARE disks. */
 		TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 			if (disk->d_state == G_RAID_DISK_S_SPARE) {
 				update += g_raid_md_sii_start_disk(disk);
 				if (disk->d_state == G_RAID_DISK_S_ACTIVE)
 					break;
 			}
 		}
 	} while (disk != NULL);
 
 	/* Write new metadata if we changed something. */
 	if (update)
 		g_raid_md_write_sii(md, NULL, NULL, NULL);
 
 	/* Update status of our need for spare. */
 	mdi->mdio_incomplete = (g_raid_ndisks(sc, G_RAID_DISK_S_ACTIVE) <
 	    mdi->mdio_total_disks);
 
 	/* Request retaste hoping to find spare. */
 	if (mdi->mdio_incomplete) {
 		task = malloc(sizeof(struct task),
 		    M_MD_SII, M_WAITOK | M_ZERO);
 		TASK_INIT(task, 0, g_disk_md_sii_retaste, task);
 		taskqueue_enqueue(taskqueue_swi, task);
 	}
 }
 
 static void
 g_raid_md_sii_start(struct g_raid_softc *sc)
 {
 	struct g_raid_md_object *md;
 	struct g_raid_md_sii_object *mdi;
 	struct g_raid_md_sii_perdisk *pd;
 	struct sii_raid_conf *meta;
 	struct g_raid_volume *vol;
 	struct g_raid_subdisk *sd;
 	struct g_raid_disk *disk, *best;
 	off_t size;
 	int j, disk_pos;
 	uint32_t gendiff, bestgendiff;
 	char buf[17];
 
 	md = sc->sc_md;
 	mdi = (struct g_raid_md_sii_object *)md;
 	meta = mdi->mdio_meta;
 
 	/* Create volumes and subdisks. */
 	sii_meta_get_name(meta, buf);
 	vol = g_raid_create_volume(sc, buf, -1);
 	vol->v_mediasize = (off_t)meta->total_sectors * 512;
 	vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_NONE;
 	if (meta->type == SII_T_RAID0) {
 		vol->v_raid_level = G_RAID_VOLUME_RL_RAID0;
 		size = vol->v_mediasize / mdi->mdio_total_disks;
 	} else if (meta->type == SII_T_RAID1) {
 		vol->v_raid_level = G_RAID_VOLUME_RL_RAID1;
 		size = vol->v_mediasize;
 	} else if (meta->type == SII_T_RAID01) {
 		vol->v_raid_level = G_RAID_VOLUME_RL_RAID1E;
 		size = vol->v_mediasize / (mdi->mdio_total_disks / 2);
 	} else if (meta->type == SII_T_CONCAT) {
 		if (mdi->mdio_total_disks == 1)
 			vol->v_raid_level = G_RAID_VOLUME_RL_SINGLE;
 		else
 			vol->v_raid_level = G_RAID_VOLUME_RL_CONCAT;
 		size = 0;
 	} else if (meta->type == SII_T_RAID5) {
 		vol->v_raid_level = G_RAID_VOLUME_RL_RAID5;
 		vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_R5LS;
 		size = vol->v_mediasize / (mdi->mdio_total_disks - 1);
 	} else if (meta->type == SII_T_JBOD) {
 		vol->v_raid_level = G_RAID_VOLUME_RL_SINGLE;
 		size = 0;
 	} else {
 		vol->v_raid_level = G_RAID_VOLUME_RL_UNKNOWN;
 		size = 0;
 	}
 	vol->v_strip_size = meta->strip_sectors * 512; //ZZZ
 	vol->v_disks_count = mdi->mdio_total_disks;
 	vol->v_sectorsize = 512; //ZZZ
 	for (j = 0; j < vol->v_disks_count; j++) {
 		sd = &vol->v_subdisks[j];
 		sd->sd_offset = 0;
 		sd->sd_size = size;
 	}
 	g_raid_start_volume(vol);
 
 	/* Create disk placeholders to store data for later writing. */
 	for (disk_pos = 0; disk_pos < mdi->mdio_total_disks; disk_pos++) {
 		pd = malloc(sizeof(*pd), M_MD_SII, M_WAITOK | M_ZERO);
 		pd->pd_disk_pos = disk_pos;
 		disk = g_raid_create_disk(sc);
 		disk->d_md_data = (void *)pd;
 		disk->d_state = G_RAID_DISK_S_OFFLINE;
 		sd = &vol->v_subdisks[disk_pos];
 		sd->sd_disk = disk;
 		TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next);
 	}
 
 	/*
 	 * Make all disks found till the moment take their places
 	 * in order of their generation numbers.
 	 */
 	do {
 		best = NULL;
 		bestgendiff = 0xffffffff;
 		TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 			if (disk->d_state != G_RAID_DISK_S_NONE)
 				continue;
 			pd = disk->d_md_data;
 			if (pd->pd_meta == NULL)
 				gendiff = 0xfffffffe;
 			else
 				gendiff = meta->generation -
 				    pd->pd_meta->generation;
 			if (gendiff < bestgendiff) {
 				best = disk;
 				bestgendiff = gendiff;
 			}
 		}
 		if (best != NULL)
 			g_raid_md_sii_start_disk(best);
 	} while (best != NULL);
 
 	mdi->mdio_started = 1;
 	G_RAID_DEBUG1(0, sc, "Array started.");
 	g_raid_md_write_sii(md, NULL, NULL, NULL);
 
 	/* Pickup any STALE/SPARE disks to refill array if needed. */
 	g_raid_md_sii_refill(sc);
 
 	g_raid_event_send(vol, G_RAID_VOLUME_E_START, G_RAID_EVENT_VOLUME);
 
 	callout_stop(&mdi->mdio_start_co);
 	G_RAID_DEBUG1(1, sc, "root_mount_rel %p", mdi->mdio_rootmount);
 	root_mount_rel(mdi->mdio_rootmount);
 	mdi->mdio_rootmount = NULL;
 }
 
 static void
 g_raid_md_sii_new_disk(struct g_raid_disk *disk)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_md_object *md;
 	struct g_raid_md_sii_object *mdi;
 	struct sii_raid_conf *pdmeta;
 	struct g_raid_md_sii_perdisk *pd;
 
 	sc = disk->d_softc;
 	md = sc->sc_md;
 	mdi = (struct g_raid_md_sii_object *)md;
 	pd = (struct g_raid_md_sii_perdisk *)disk->d_md_data;
 	pdmeta = pd->pd_meta;
 
 	if (mdi->mdio_started) {
 		if (g_raid_md_sii_start_disk(disk))
 			g_raid_md_write_sii(md, NULL, NULL, NULL);
 	} else {
 		if (mdi->mdio_meta == NULL ||
 		    ((int32_t)(pdmeta->generation - mdi->mdio_generation)) > 0) {
 			G_RAID_DEBUG1(1, sc, "Newer disk");
 			if (mdi->mdio_meta != NULL)
 				free(mdi->mdio_meta, M_MD_SII);
 			mdi->mdio_meta = sii_meta_copy(pdmeta);
 			mdi->mdio_generation = mdi->mdio_meta->generation;
 			mdi->mdio_total_disks = sii_meta_total_disks(pdmeta);
 			mdi->mdio_disks_present = 1;
 		} else if (pdmeta->generation == mdi->mdio_generation) {
 			mdi->mdio_disks_present++;
 			G_RAID_DEBUG1(1, sc, "Matching disk (%d of %d up)",
 			    mdi->mdio_disks_present,
 			    mdi->mdio_total_disks);
 		} else {
 			G_RAID_DEBUG1(1, sc, "Older disk");
 		}
 
 		/* If we collected all needed disks - start array. */
 		if (mdi->mdio_disks_present == mdi->mdio_total_disks)
 			g_raid_md_sii_start(sc);
 	}
 }
 
 static void
 g_raid_sii_go(void *arg)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_md_object *md;
 	struct g_raid_md_sii_object *mdi;
 
 	sc = arg;
 	md = sc->sc_md;
 	mdi = (struct g_raid_md_sii_object *)md;
 	if (!mdi->mdio_started) {
 		G_RAID_DEBUG1(0, sc, "Force array start due to timeout.");
 		g_raid_event_send(sc, G_RAID_NODE_E_START, 0);
 	}
 }
 
 static int
 g_raid_md_create_sii(struct g_raid_md_object *md, struct g_class *mp,
     struct g_geom **gp)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_md_sii_object *mdi;
 	char name[32];
 
 	mdi = (struct g_raid_md_sii_object *)md;
 	mdi->mdio_timestamp[5] = arc4random();
 	mdi->mdio_timestamp[4] = arc4random();
 	mdi->mdio_timestamp[3] = arc4random();
 	mdi->mdio_timestamp[2] = arc4random();
 	mdi->mdio_timestamp[1] = arc4random();
 	mdi->mdio_timestamp[0] = arc4random();
 	mdi->mdio_location = arc4random();
 	mdi->mdio_generation = 0;
 	snprintf(name, sizeof(name), "SiI-%02x%02x%02x%02x%02x%02x",
 	    mdi->mdio_timestamp[5], mdi->mdio_timestamp[4],
 	    mdi->mdio_timestamp[3], mdi->mdio_timestamp[2],
 	    mdi->mdio_timestamp[1], mdi->mdio_timestamp[0]);
 	sc = g_raid_create_node(mp, name, md);
 	if (sc == NULL)
 		return (G_RAID_MD_TASTE_FAIL);
 	md->mdo_softc = sc;
 	*gp = sc->sc_geom;
 	return (G_RAID_MD_TASTE_NEW);
 }
 
 static int
 g_raid_md_taste_sii(struct g_raid_md_object *md, struct g_class *mp,
                               struct g_consumer *cp, struct g_geom **gp)
 {
 	struct g_consumer *rcp;
 	struct g_provider *pp;
 	struct g_raid_md_sii_object *mdi, *mdi1;
 	struct g_raid_softc *sc;
 	struct g_raid_disk *disk;
 	struct sii_raid_conf *meta;
 	struct g_raid_md_sii_perdisk *pd;
 	struct g_geom *geom;
 	int disk_pos, result, spare, len;
 	char name[32];
 	uint16_t vendor;
 
 	G_RAID_DEBUG(1, "Tasting SiI on %s", cp->provider->name);
 	mdi = (struct g_raid_md_sii_object *)md;
 	pp = cp->provider;
 
 	/* Read metadata from device. */
 	meta = NULL;
 	g_topology_unlock();
 	vendor = 0xffff;
 	len = sizeof(vendor);
 	if (pp->geom->rank == 1)
 		g_io_getattr("GEOM::hba_vendor", cp, &len, &vendor);
 	meta = sii_meta_read(cp);
 	g_topology_lock();
 	if (meta == NULL) {
 		if (g_raid_aggressive_spare) {
 			if (vendor == 0x1095) {
 				G_RAID_DEBUG(1,
 				    "No SiI metadata, forcing spare.");
 				spare = 2;
 				goto search;
 			} else {
 				G_RAID_DEBUG(1,
 				    "SiI vendor mismatch 0x%04x != 0x1095",
 				    vendor);
 			}
 		}
 		return (G_RAID_MD_TASTE_FAIL);
 	}
 
 	/* Check this disk position in obtained metadata. */
 	disk_pos = sii_meta_disk_pos(meta, meta);
 	if (disk_pos == -1) {
 		G_RAID_DEBUG(1, "SiI disk position not found");
 		goto fail1;
 	}
 
 	/* Metadata valid. Print it. */
 	g_raid_md_sii_print(meta);
 	G_RAID_DEBUG(1, "SiI disk position %d", disk_pos);
 	spare = (meta->type == SII_T_SPARE) ? 1 : 0;
 
 search:
 	/* Search for matching node. */
 	sc = NULL;
 	mdi1 = NULL;
 	LIST_FOREACH(geom, &mp->geom, geom) {
 		sc = geom->softc;
 		if (sc == NULL)
 			continue;
 		if (sc->sc_stopping != 0)
 			continue;
 		if (sc->sc_md->mdo_class != md->mdo_class)
 			continue;
 		mdi1 = (struct g_raid_md_sii_object *)sc->sc_md;
 		if (spare) {
 			if (mdi1->mdio_incomplete)
 				break;
 		} else {
 			if (mdi1->mdio_location == meta->raid_location &&
 			    memcmp(&mdi1->mdio_timestamp,
 			     &meta->timestamp, 6) == 0)
 				break;
 		}
 	}
 
 	/* Found matching node. */
 	if (geom != NULL) {
 		G_RAID_DEBUG(1, "Found matching array %s", sc->sc_name);
 		result = G_RAID_MD_TASTE_EXISTING;
 
 	} else if (spare) { /* Not found needy node -- left for later. */
 		G_RAID_DEBUG(1, "Spare is not needed at this time");
 		goto fail1;
 
 	} else { /* Not found matching node -- create one. */
 		result = G_RAID_MD_TASTE_NEW;
 		memcpy(&mdi->mdio_timestamp, &meta->timestamp, 6);
 		mdi->mdio_location = meta->raid_location;
 		snprintf(name, sizeof(name), "SiI-%02x%02x%02x%02x%02x%02x",
 		    mdi->mdio_timestamp[5], mdi->mdio_timestamp[4],
 		    mdi->mdio_timestamp[3], mdi->mdio_timestamp[2],
 		    mdi->mdio_timestamp[1], mdi->mdio_timestamp[0]);
 		sc = g_raid_create_node(mp, name, md);
 		md->mdo_softc = sc;
 		geom = sc->sc_geom;
 		callout_init(&mdi->mdio_start_co, 1);
 		callout_reset(&mdi->mdio_start_co, g_raid_start_timeout * hz,
 		    g_raid_sii_go, sc);
 		mdi->mdio_rootmount = root_mount_hold("GRAID-SiI");
 		G_RAID_DEBUG1(1, sc, "root_mount_hold %p", mdi->mdio_rootmount);
 	}
 
 	/* There is no return after this point, so we close passed consumer. */
 	g_access(cp, -1, 0, 0);
 
 	rcp = g_new_consumer(geom);
 	rcp->flags |= G_CF_DIRECT_RECEIVE;
 	g_attach(rcp, pp);
 	if (g_access(rcp, 1, 1, 1) != 0)
 		; //goto fail1;
 
 	g_topology_unlock();
 	sx_xlock(&sc->sc_lock);
 
 	pd = malloc(sizeof(*pd), M_MD_SII, M_WAITOK | M_ZERO);
 	pd->pd_meta = meta;
 	if (spare == 2) {
 		pd->pd_disk_pos = -3;
 	} else {
 		pd->pd_disk_pos = -1;
 	}
 	pd->pd_disk_size = pp->mediasize;
 	disk = g_raid_create_disk(sc);
 	disk->d_md_data = (void *)pd;
 	disk->d_consumer = rcp;
 	rcp->private = disk;
 
 	g_raid_get_disk_info(disk);
 
 	g_raid_md_sii_new_disk(disk);
 
 	sx_xunlock(&sc->sc_lock);
 	g_topology_lock();
 	*gp = geom;
 	return (result);
 fail1:
 	free(meta, M_MD_SII);
 	return (G_RAID_MD_TASTE_FAIL);
 }
 
 static int
 g_raid_md_event_sii(struct g_raid_md_object *md,
     struct g_raid_disk *disk, u_int event)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_subdisk *sd;
 	struct g_raid_md_sii_object *mdi;
 	struct g_raid_md_sii_perdisk *pd;
 
 	sc = md->mdo_softc;
 	mdi = (struct g_raid_md_sii_object *)md;
 	if (disk == NULL) {
 		switch (event) {
 		case G_RAID_NODE_E_START:
 			if (!mdi->mdio_started)
 				g_raid_md_sii_start(sc);
 			return (0);
 		}
 		return (-1);
 	}
 	pd = (struct g_raid_md_sii_perdisk *)disk->d_md_data;
 	switch (event) {
 	case G_RAID_DISK_E_DISCONNECTED:
 		/* If disk was assigned, just update statuses. */
 		if (pd->pd_disk_pos >= 0) {
 			g_raid_change_disk_state(disk, G_RAID_DISK_S_OFFLINE);
 			if (disk->d_consumer) {
 				g_raid_kill_consumer(sc, disk->d_consumer);
 				disk->d_consumer = NULL;
 			}
 			TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) {
 				g_raid_change_subdisk_state(sd,
 				    G_RAID_SUBDISK_S_NONE);
 				g_raid_event_send(sd, G_RAID_SUBDISK_E_DISCONNECTED,
 				    G_RAID_EVENT_SUBDISK);
 			}
 		} else {
 			/* Otherwise -- delete. */
 			g_raid_change_disk_state(disk, G_RAID_DISK_S_NONE);
 			g_raid_destroy_disk(disk);
 		}
 
 		/* Write updated metadata to all disks. */
 		g_raid_md_write_sii(md, NULL, NULL, NULL);
 
 		/* Check if anything left except placeholders. */
 		if (g_raid_ndisks(sc, -1) ==
 		    g_raid_ndisks(sc, G_RAID_DISK_S_OFFLINE))
 			g_raid_destroy_node(sc, 0);
 		else
 			g_raid_md_sii_refill(sc);
 		return (0);
 	}
 	return (-2);
 }
 
 static int
 g_raid_md_ctl_sii(struct g_raid_md_object *md,
     struct gctl_req *req)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_volume *vol;
 	struct g_raid_subdisk *sd;
 	struct g_raid_disk *disk;
 	struct g_raid_md_sii_object *mdi;
 	struct g_raid_md_sii_perdisk *pd;
 	struct g_consumer *cp;
 	struct g_provider *pp;
 	char arg[16];
 	const char *verb, *volname, *levelname, *diskname;
 	int *nargs, *force;
 	off_t size, sectorsize, strip;
 	intmax_t *sizearg, *striparg;
 	int numdisks, i, len, level, qual, update;
 	int error;
 
 	sc = md->mdo_softc;
 	mdi = (struct g_raid_md_sii_object *)md;
 	verb = gctl_get_param(req, "verb", NULL);
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	error = 0;
 	if (strcmp(verb, "label") == 0) {
 
 		if (*nargs < 4) {
 			gctl_error(req, "Invalid number of arguments.");
 			return (-1);
 		}
 		volname = gctl_get_asciiparam(req, "arg1");
 		if (volname == NULL) {
 			gctl_error(req, "No volume name.");
 			return (-2);
 		}
 		levelname = gctl_get_asciiparam(req, "arg2");
 		if (levelname == NULL) {
 			gctl_error(req, "No RAID level.");
 			return (-3);
 		}
 		if (strcasecmp(levelname, "RAID5") == 0)
 			levelname = "RAID5-LS";
 		if (g_raid_volume_str2level(levelname, &level, &qual)) {
 			gctl_error(req, "Unknown RAID level '%s'.", levelname);
 			return (-4);
 		}
 		numdisks = *nargs - 3;
 		force = gctl_get_paraml(req, "force", sizeof(*force));
 		if (!g_raid_md_sii_supported(level, qual, numdisks,
 		    force ? *force : 0)) {
 			gctl_error(req, "Unsupported RAID level "
 			    "(0x%02x/0x%02x), or number of disks (%d).",
 			    level, qual, numdisks);
 			return (-5);
 		}
 
 		/* Search for disks, connect them and probe. */
 		size = 0x7fffffffffffffffllu;
 		sectorsize = 0;
 		for (i = 0; i < numdisks; i++) {
 			snprintf(arg, sizeof(arg), "arg%d", i + 3);
 			diskname = gctl_get_asciiparam(req, arg);
 			if (diskname == NULL) {
 				gctl_error(req, "No disk name (%s).", arg);
 				error = -6;
 				break;
 			}
 			if (strcmp(diskname, "NONE") == 0) {
 				cp = NULL;
 				pp = NULL;
 			} else {
 				g_topology_lock();
 				cp = g_raid_open_consumer(sc, diskname);
 				if (cp == NULL) {
 					gctl_error(req, "Can't open '%s'.",
 					    diskname);
 					g_topology_unlock();
 					error = -7;
 					break;
 				}
 				pp = cp->provider;
 			}
 			pd = malloc(sizeof(*pd), M_MD_SII, M_WAITOK | M_ZERO);
 			pd->pd_disk_pos = i;
 			disk = g_raid_create_disk(sc);
 			disk->d_md_data = (void *)pd;
 			disk->d_consumer = cp;
 			if (cp == NULL)
 				continue;
 			cp->private = disk;
 			g_topology_unlock();
 
 			g_raid_get_disk_info(disk);
 
 			pd->pd_disk_size = pp->mediasize;
 			if (size > pp->mediasize)
 				size = pp->mediasize;
 			if (sectorsize < pp->sectorsize)
 				sectorsize = pp->sectorsize;
 		}
 		if (error != 0)
 			return (error);
 
 		if (sectorsize <= 0) {
 			gctl_error(req, "Can't get sector size.");
 			return (-8);
 		}
 
 		/* Reserve space for metadata. */
 		size -= 0x800 * sectorsize;
 
 		/* Handle size argument. */
 		len = sizeof(*sizearg);
 		sizearg = gctl_get_param(req, "size", &len);
 		if (sizearg != NULL && len == sizeof(*sizearg) &&
 		    *sizearg > 0) {
 			if (*sizearg > size) {
 				gctl_error(req, "Size too big %lld > %lld.",
 				    (long long)*sizearg, (long long)size);
 				return (-9);
 			}
 			size = *sizearg;
 		}
 
 		/* Handle strip argument. */
 		strip = 131072;
 		len = sizeof(*striparg);
 		striparg = gctl_get_param(req, "strip", &len);
 		if (striparg != NULL && len == sizeof(*striparg) &&
 		    *striparg > 0) {
 			if (*striparg < sectorsize) {
 				gctl_error(req, "Strip size too small.");
 				return (-10);
 			}
 			if (*striparg % sectorsize != 0) {
 				gctl_error(req, "Incorrect strip size.");
 				return (-11);
 			}
 			if (strip > 65535 * sectorsize) {
 				gctl_error(req, "Strip size too big.");
 				return (-12);
 			}
 			strip = *striparg;
 		}
 
 		/* Round size down to strip or sector. */
 		if (level == G_RAID_VOLUME_RL_RAID1)
 			size -= (size % sectorsize);
 		else if (level == G_RAID_VOLUME_RL_RAID1E &&
 		    (numdisks & 1) != 0)
 			size -= (size % (2 * strip));
 		else
 			size -= (size % strip);
 		if (size <= 0) {
 			gctl_error(req, "Size too small.");
 			return (-13);
 		}
 		if (size > 0xffffffffffffllu * sectorsize) {
 			gctl_error(req, "Size too big.");
 			return (-14);
 		}
 
 		/* We have all we need, create things: volume, ... */
 		mdi->mdio_total_disks = numdisks;
 		mdi->mdio_started = 1;
 		vol = g_raid_create_volume(sc, volname, -1);
 		vol->v_md_data = (void *)(intptr_t)0;
 		vol->v_raid_level = level;
 		vol->v_raid_level_qualifier = qual;
 		vol->v_strip_size = strip;
 		vol->v_disks_count = numdisks;
 		if (level == G_RAID_VOLUME_RL_RAID0 ||
 		    level == G_RAID_VOLUME_RL_CONCAT ||
 		    level == G_RAID_VOLUME_RL_SINGLE)
 			vol->v_mediasize = size * numdisks;
 		else if (level == G_RAID_VOLUME_RL_RAID1)
 			vol->v_mediasize = size;
 		else if (level == G_RAID_VOLUME_RL_RAID5)
 			vol->v_mediasize = size * (numdisks - 1);
 		else { /* RAID1E */
 			vol->v_mediasize = ((size * numdisks) / strip / 2) *
 			    strip;
 		}
 		vol->v_sectorsize = sectorsize;
 		g_raid_start_volume(vol);
 
 		/* , and subdisks. */
 		TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 			pd = (struct g_raid_md_sii_perdisk *)disk->d_md_data;
 			sd = &vol->v_subdisks[pd->pd_disk_pos];
 			sd->sd_disk = disk;
 			sd->sd_offset = 0;
 			sd->sd_size = size;
 			TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next);
 			if (sd->sd_disk->d_consumer != NULL) {
 				g_raid_change_disk_state(disk,
 				    G_RAID_DISK_S_ACTIVE);
 				g_raid_change_subdisk_state(sd,
 				    G_RAID_SUBDISK_S_ACTIVE);
 				g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW,
 				    G_RAID_EVENT_SUBDISK);
 			} else {
 				g_raid_change_disk_state(disk, G_RAID_DISK_S_OFFLINE);
 			}
 		}
 
 		/* Write metadata based on created entities. */
 		G_RAID_DEBUG1(0, sc, "Array started.");
 		g_raid_md_write_sii(md, NULL, NULL, NULL);
 
 		/* Pickup any STALE/SPARE disks to refill array if needed. */
 		g_raid_md_sii_refill(sc);
 
 		g_raid_event_send(vol, G_RAID_VOLUME_E_START,
 		    G_RAID_EVENT_VOLUME);
 		return (0);
 	}
 	if (strcmp(verb, "delete") == 0) {
 
 		/* Check if some volume is still open. */
 		force = gctl_get_paraml(req, "force", sizeof(*force));
 		if (force != NULL && *force == 0 &&
 		    g_raid_nopens(sc) != 0) {
 			gctl_error(req, "Some volume is still open.");
 			return (-4);
 		}
 
 		TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 			if (disk->d_consumer)
 				sii_meta_erase(disk->d_consumer);
 		}
 		g_raid_destroy_node(sc, 0);
 		return (0);
 	}
 	if (strcmp(verb, "remove") == 0 ||
 	    strcmp(verb, "fail") == 0) {
 		if (*nargs < 2) {
 			gctl_error(req, "Invalid number of arguments.");
 			return (-1);
 		}
 		for (i = 1; i < *nargs; i++) {
 			snprintf(arg, sizeof(arg), "arg%d", i);
 			diskname = gctl_get_asciiparam(req, arg);
 			if (diskname == NULL) {
 				gctl_error(req, "No disk name (%s).", arg);
 				error = -2;
 				break;
 			}
 			if (strncmp(diskname, "/dev/", 5) == 0)
 				diskname += 5;
 
 			TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 				if (disk->d_consumer != NULL && 
 				    disk->d_consumer->provider != NULL &&
 				    strcmp(disk->d_consumer->provider->name,
 				     diskname) == 0)
 					break;
 			}
 			if (disk == NULL) {
 				gctl_error(req, "Disk '%s' not found.",
 				    diskname);
 				error = -3;
 				break;
 			}
 
 			if (strcmp(verb, "fail") == 0) {
 				g_raid_md_fail_disk_sii(md, NULL, disk);
 				continue;
 			}
 
 			pd = (struct g_raid_md_sii_perdisk *)disk->d_md_data;
 
 			/* Erase metadata on deleting disk. */
 			sii_meta_erase(disk->d_consumer);
 
 			/* If disk was assigned, just update statuses. */
 			if (pd->pd_disk_pos >= 0) {
 				g_raid_change_disk_state(disk, G_RAID_DISK_S_OFFLINE);
 				g_raid_kill_consumer(sc, disk->d_consumer);
 				disk->d_consumer = NULL;
 				TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) {
 					g_raid_change_subdisk_state(sd,
 					    G_RAID_SUBDISK_S_NONE);
 					g_raid_event_send(sd, G_RAID_SUBDISK_E_DISCONNECTED,
 					    G_RAID_EVENT_SUBDISK);
 				}
 			} else {
 				/* Otherwise -- delete. */
 				g_raid_change_disk_state(disk, G_RAID_DISK_S_NONE);
 				g_raid_destroy_disk(disk);
 			}
 		}
 
 		/* Write updated metadata to remaining disks. */
 		g_raid_md_write_sii(md, NULL, NULL, NULL);
 
 		/* Check if anything left except placeholders. */
 		if (g_raid_ndisks(sc, -1) ==
 		    g_raid_ndisks(sc, G_RAID_DISK_S_OFFLINE))
 			g_raid_destroy_node(sc, 0);
 		else
 			g_raid_md_sii_refill(sc);
 		return (error);
 	}
 	if (strcmp(verb, "insert") == 0) {
 		if (*nargs < 2) {
 			gctl_error(req, "Invalid number of arguments.");
 			return (-1);
 		}
 		update = 0;
 		for (i = 1; i < *nargs; i++) {
 			/* Get disk name. */
 			snprintf(arg, sizeof(arg), "arg%d", i);
 			diskname = gctl_get_asciiparam(req, arg);
 			if (diskname == NULL) {
 				gctl_error(req, "No disk name (%s).", arg);
 				error = -3;
 				break;
 			}
 
 			/* Try to find provider with specified name. */
 			g_topology_lock();
 			cp = g_raid_open_consumer(sc, diskname);
 			if (cp == NULL) {
 				gctl_error(req, "Can't open disk '%s'.",
 				    diskname);
 				g_topology_unlock();
 				error = -4;
 				break;
 			}
 			pp = cp->provider;
 
 			pd = malloc(sizeof(*pd), M_MD_SII, M_WAITOK | M_ZERO);
 			pd->pd_disk_pos = -3;
 			pd->pd_disk_size = pp->mediasize;
 
 			disk = g_raid_create_disk(sc);
 			disk->d_consumer = cp;
 			disk->d_md_data = (void *)pd;
 			cp->private = disk;
 			g_topology_unlock();
 
 			g_raid_get_disk_info(disk);
 
 			/* Welcome the "new" disk. */
 			update += g_raid_md_sii_start_disk(disk);
 			if (disk->d_state == G_RAID_DISK_S_SPARE) {
 				sii_meta_write_spare(cp);
 				g_raid_destroy_disk(disk);
 			} else if (disk->d_state != G_RAID_DISK_S_ACTIVE) {
 				gctl_error(req, "Disk '%s' doesn't fit.",
 				    diskname);
 				g_raid_destroy_disk(disk);
 				error = -8;
 				break;
 			}
 		}
 
 		/* Write new metadata if we changed something. */
 		if (update)
 			g_raid_md_write_sii(md, NULL, NULL, NULL);
 		return (error);
 	}
 	gctl_error(req, "Command '%s' is not supported.", verb);
 	return (-100);
 }
 
 static int
 g_raid_md_write_sii(struct g_raid_md_object *md, struct g_raid_volume *tvol,
     struct g_raid_subdisk *tsd, struct g_raid_disk *tdisk)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_volume *vol;
 	struct g_raid_subdisk *sd;
 	struct g_raid_disk *disk;
 	struct g_raid_md_sii_object *mdi;
 	struct g_raid_md_sii_perdisk *pd;
 	struct sii_raid_conf *meta;
 	u_int i;
 
 	sc = md->mdo_softc;
 	mdi = (struct g_raid_md_sii_object *)md;
 
 	if (sc->sc_stopping == G_RAID_DESTROY_HARD)
 		return (0);
 
 	/* Bump generation. Newly written metadata may differ from previous. */
 	mdi->mdio_generation++;
 
 	/* There is only one volume. */
 	vol = TAILQ_FIRST(&sc->sc_volumes);
 
 	/* Fill global fields. */
 	meta = malloc(sizeof(*meta), M_MD_SII, M_WAITOK | M_ZERO);
 	if (mdi->mdio_meta)
 		memcpy(meta, mdi->mdio_meta, sizeof(*meta));
 	meta->total_sectors = vol->v_mediasize / vol->v_sectorsize;
 	meta->vendor_id = 0x1095;
 	meta->version_minor = 0;
 	meta->version_major = 2;
 	memcpy(&meta->timestamp, &mdi->mdio_timestamp, 6);
 	meta->strip_sectors = vol->v_strip_size / vol->v_sectorsize;
 	if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID0) {
 		meta->type = SII_T_RAID0;
 		meta->raid0_disks = vol->v_disks_count;
 		meta->raid1_disks = 0xff;
 	} else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1) {
 		meta->type = SII_T_RAID1;
 		meta->raid0_disks = 0xff;
 		meta->raid1_disks = vol->v_disks_count;
 	} else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1E) {
 		meta->type = SII_T_RAID01;
 		meta->raid0_disks = vol->v_disks_count / 2;
 		meta->raid1_disks = 2;
 	} else if (vol->v_raid_level == G_RAID_VOLUME_RL_CONCAT ||
 	    vol->v_raid_level == G_RAID_VOLUME_RL_SINGLE) {
 		meta->type = SII_T_JBOD;
 		meta->raid0_disks = vol->v_disks_count;
 		meta->raid1_disks = 0xff;
 	} else {
 		meta->type = SII_T_RAID5;
 		meta->raid0_disks = vol->v_disks_count;
 		meta->raid1_disks = 0xff;
 	}
 	meta->generation = mdi->mdio_generation;
 	meta->raid_status = vol->v_dirty ? SII_S_ONLINE : SII_S_AVAILABLE;
 	for (i = 0; i < vol->v_disks_count; i++) {
 		sd = &vol->v_subdisks[i];
 		if (sd->sd_state == G_RAID_SUBDISK_S_STALE ||
 		    sd->sd_state == G_RAID_SUBDISK_S_RESYNC)
 			meta->raid_status = SII_S_ONLINE;
 	}
 	meta->raid_location = mdi->mdio_location;
 	sii_meta_put_name(meta, vol->v_name);
 
 	/* We are done. Print meta data and store them to disks. */
 	if (mdi->mdio_meta != NULL)
 		free(mdi->mdio_meta, M_MD_SII);
 	mdi->mdio_meta = meta;
 	TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 		pd = (struct g_raid_md_sii_perdisk *)disk->d_md_data;
 		if (disk->d_state != G_RAID_DISK_S_ACTIVE)
 			continue;
 		if (pd->pd_meta != NULL) {
 			free(pd->pd_meta, M_MD_SII);
 			pd->pd_meta = NULL;
 		}
 		pd->pd_meta = sii_meta_copy(meta);
 		if ((sd = TAILQ_FIRST(&disk->d_subdisks)) != NULL) {
 			if (sd->sd_state < G_RAID_SUBDISK_S_NEW)
 				pd->pd_meta->disk_status = SII_S_DROPPED;
 			else if (sd->sd_state < G_RAID_SUBDISK_S_STALE) {
 				pd->pd_meta->disk_status = SII_S_REBUILD;
 				pd->pd_meta->rebuild_lba =
 				    sd->sd_rebuild_pos / vol->v_sectorsize;
 			} else
 				pd->pd_meta->disk_status = SII_S_CURRENT;
 			if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1) {
 				pd->pd_meta->disk_number = sd->sd_pos;
 				pd->pd_meta->raid0_ident = 0xff;
 				pd->pd_meta->raid1_ident = 0;
 			} else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1E) {
 				pd->pd_meta->disk_number = sd->sd_pos / meta->raid1_disks;
 				pd->pd_meta->raid0_ident = sd->sd_pos % meta->raid1_disks;
 				pd->pd_meta->raid1_ident = sd->sd_pos / meta->raid1_disks;
 			} else {
 				pd->pd_meta->disk_number = sd->sd_pos;
 				pd->pd_meta->raid0_ident = 0;
 				pd->pd_meta->raid1_ident = 0xff;
 			}
 		}
 		G_RAID_DEBUG(1, "Writing SiI metadata to %s",
 		    g_raid_get_diskname(disk));
 		g_raid_md_sii_print(pd->pd_meta);
 		sii_meta_write(disk->d_consumer, pd->pd_meta);
 	}
 	return (0);
 }
 
 static int
 g_raid_md_fail_disk_sii(struct g_raid_md_object *md,
     struct g_raid_subdisk *tsd, struct g_raid_disk *tdisk)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_md_sii_perdisk *pd;
 	struct g_raid_subdisk *sd;
 
 	sc = md->mdo_softc;
 	pd = (struct g_raid_md_sii_perdisk *)tdisk->d_md_data;
 
 	/* We can't fail disk that is not a part of array now. */
 	if (pd->pd_disk_pos < 0)
 		return (-1);
 
 	/*
 	 * Mark disk as failed in metadata and try to write that metadata
 	 * to the disk itself to prevent it's later resurrection as STALE.
 	 */
 	if (tdisk->d_consumer) {
 		if (pd->pd_meta) {
 			pd->pd_meta->disk_status = SII_S_REMOVED;
 			sii_meta_write(tdisk->d_consumer, pd->pd_meta);
 		} else
 			sii_meta_erase(tdisk->d_consumer);
 	}
 
 	/* Change states. */
 	g_raid_change_disk_state(tdisk, G_RAID_DISK_S_FAILED);
 	TAILQ_FOREACH(sd, &tdisk->d_subdisks, sd_next) {
 		g_raid_change_subdisk_state(sd,
 		    G_RAID_SUBDISK_S_FAILED);
 		g_raid_event_send(sd, G_RAID_SUBDISK_E_FAILED,
 		    G_RAID_EVENT_SUBDISK);
 	}
 
 	/* Write updated metadata to remaining disks. */
 	g_raid_md_write_sii(md, NULL, NULL, tdisk);
 
 	/* Check if anything left except placeholders. */
 	if (g_raid_ndisks(sc, -1) ==
 	    g_raid_ndisks(sc, G_RAID_DISK_S_OFFLINE))
 		g_raid_destroy_node(sc, 0);
 	else
 		g_raid_md_sii_refill(sc);
 	return (0);
 }
 
 static int
 g_raid_md_free_disk_sii(struct g_raid_md_object *md,
     struct g_raid_disk *disk)
 {
 	struct g_raid_md_sii_perdisk *pd;
 
 	pd = (struct g_raid_md_sii_perdisk *)disk->d_md_data;
 	if (pd->pd_meta != NULL) {
 		free(pd->pd_meta, M_MD_SII);
 		pd->pd_meta = NULL;
 	}
 	free(pd, M_MD_SII);
 	disk->d_md_data = NULL;
 	return (0);
 }
 
 static int
 g_raid_md_free_sii(struct g_raid_md_object *md)
 {
 	struct g_raid_md_sii_object *mdi;
 
 	mdi = (struct g_raid_md_sii_object *)md;
 	if (!mdi->mdio_started) {
 		mdi->mdio_started = 0;
 		callout_stop(&mdi->mdio_start_co);
 		G_RAID_DEBUG1(1, md->mdo_softc,
 		    "root_mount_rel %p", mdi->mdio_rootmount);
 		root_mount_rel(mdi->mdio_rootmount);
 		mdi->mdio_rootmount = NULL;
 	}
 	if (mdi->mdio_meta != NULL) {
 		free(mdi->mdio_meta, M_MD_SII);
 		mdi->mdio_meta = NULL;
 	}
 	return (0);
 }
 
 G_RAID_MD_DECLARE(sii, "SiI");
Index: head/sys/geom/raid/tr_concat.c
===================================================================
--- head/sys/geom/raid/tr_concat.c	(revision 350693)
+++ head/sys/geom/raid/tr_concat.c	(revision 350694)
@@ -1,355 +1,356 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2010 Alexander Motin <mav@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/bio.h>
 #include <sys/endian.h>
 #include <sys/kernel.h>
 #include <sys/kobj.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/systm.h>
 #include <geom/geom.h>
+#include <geom/geom_dbg.h>
 #include "geom/raid/g_raid.h"
 #include "g_raid_tr_if.h"
 
 static MALLOC_DEFINE(M_TR_CONCAT, "tr_concat_data", "GEOM_RAID CONCAT data");
 
 struct g_raid_tr_concat_object {
 	struct g_raid_tr_object	 trso_base;
 	int			 trso_starting;
 	int			 trso_stopped;
 };
 
 static g_raid_tr_taste_t g_raid_tr_taste_concat;
 static g_raid_tr_event_t g_raid_tr_event_concat;
 static g_raid_tr_start_t g_raid_tr_start_concat;
 static g_raid_tr_stop_t g_raid_tr_stop_concat;
 static g_raid_tr_iostart_t g_raid_tr_iostart_concat;
 static g_raid_tr_iodone_t g_raid_tr_iodone_concat;
 static g_raid_tr_kerneldump_t g_raid_tr_kerneldump_concat;
 static g_raid_tr_free_t g_raid_tr_free_concat;
 
 static kobj_method_t g_raid_tr_concat_methods[] = {
 	KOBJMETHOD(g_raid_tr_taste,	g_raid_tr_taste_concat),
 	KOBJMETHOD(g_raid_tr_event,	g_raid_tr_event_concat),
 	KOBJMETHOD(g_raid_tr_start,	g_raid_tr_start_concat),
 	KOBJMETHOD(g_raid_tr_stop,	g_raid_tr_stop_concat),
 	KOBJMETHOD(g_raid_tr_iostart,	g_raid_tr_iostart_concat),
 	KOBJMETHOD(g_raid_tr_iodone,	g_raid_tr_iodone_concat),
 	KOBJMETHOD(g_raid_tr_kerneldump,	g_raid_tr_kerneldump_concat),
 	KOBJMETHOD(g_raid_tr_free,	g_raid_tr_free_concat),
 	{ 0, 0 }
 };
 
 static struct g_raid_tr_class g_raid_tr_concat_class = {
 	"CONCAT",
 	g_raid_tr_concat_methods,
 	sizeof(struct g_raid_tr_concat_object),
 	.trc_enable = 1,
 	.trc_priority = 50,
 	.trc_accept_unmapped = 1
 };
 
 static int
 g_raid_tr_taste_concat(struct g_raid_tr_object *tr, struct g_raid_volume *volume)
 {
 	struct g_raid_tr_concat_object *trs;
 
 	trs = (struct g_raid_tr_concat_object *)tr;
 	if (tr->tro_volume->v_raid_level != G_RAID_VOLUME_RL_SINGLE &&
 	    tr->tro_volume->v_raid_level != G_RAID_VOLUME_RL_CONCAT &&
 	    !(tr->tro_volume->v_disks_count == 1 &&
 	      tr->tro_volume->v_raid_level != G_RAID_VOLUME_RL_UNKNOWN))
 		return (G_RAID_TR_TASTE_FAIL);
 	trs->trso_starting = 1;
 	return (G_RAID_TR_TASTE_SUCCEED);
 }
 
 static int
 g_raid_tr_update_state_concat(struct g_raid_volume *vol)
 {
 	struct g_raid_tr_concat_object *trs;
 	struct g_raid_softc *sc;
 	off_t size;
 	u_int s;
 	int i, n, f;
 
 	sc = vol->v_softc;
 	trs = (struct g_raid_tr_concat_object *)vol->v_tr;
 	if (trs->trso_stopped)
 		s = G_RAID_VOLUME_S_STOPPED;
 	else if (trs->trso_starting)
 		s = G_RAID_VOLUME_S_STARTING;
 	else {
 		n = g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_ACTIVE);
 		f = g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_FAILED);
 		if (n + f == vol->v_disks_count) {
 			if (f == 0)
 				s = G_RAID_VOLUME_S_OPTIMAL;
 			else
 				s = G_RAID_VOLUME_S_SUBOPTIMAL;
 		} else
 			s = G_RAID_VOLUME_S_BROKEN;
 	}
 	if (s != vol->v_state) {
 
 		/*
 		 * Some metadata modules may not know CONCAT volume
 		 * mediasize until all disks connected. Recalculate.
 		 */
 		if (vol->v_raid_level == G_RAID_VOLUME_RL_CONCAT &&
 		    G_RAID_VOLUME_S_ALIVE(s) &&
 		    !G_RAID_VOLUME_S_ALIVE(vol->v_state)) {
 			size = 0;
 			for (i = 0; i < vol->v_disks_count; i++) {
 				if (vol->v_subdisks[i].sd_state !=
 				    G_RAID_SUBDISK_S_NONE)
 					size += vol->v_subdisks[i].sd_size;
 			}
 			vol->v_mediasize = size;
 		}
 
 		g_raid_event_send(vol, G_RAID_VOLUME_S_ALIVE(s) ?
 		    G_RAID_VOLUME_E_UP : G_RAID_VOLUME_E_DOWN,
 		    G_RAID_EVENT_VOLUME);
 		g_raid_change_volume_state(vol, s);
 		if (!trs->trso_starting && !trs->trso_stopped)
 			g_raid_write_metadata(sc, vol, NULL, NULL);
 	}
 	return (0);
 }
 
 static int
 g_raid_tr_event_concat(struct g_raid_tr_object *tr,
     struct g_raid_subdisk *sd, u_int event)
 {
 	struct g_raid_tr_concat_object *trs;
 	struct g_raid_softc *sc;
 	struct g_raid_volume *vol;
 	int state;
 
 	trs = (struct g_raid_tr_concat_object *)tr;
 	vol = tr->tro_volume;
 	sc = vol->v_softc;
 
 	state = sd->sd_state;
 	if (state != G_RAID_SUBDISK_S_NONE &&
 	    state != G_RAID_SUBDISK_S_FAILED &&
 	    state != G_RAID_SUBDISK_S_ACTIVE) {
 		G_RAID_DEBUG1(1, sc,
 		    "Promote subdisk %s:%d from %s to ACTIVE.",
 		    vol->v_name, sd->sd_pos,
 		    g_raid_subdisk_state2str(sd->sd_state));
 		g_raid_change_subdisk_state(sd, G_RAID_SUBDISK_S_ACTIVE);
 	}
 	if (state != sd->sd_state &&
 	    !trs->trso_starting && !trs->trso_stopped)
 		g_raid_write_metadata(sc, vol, sd, NULL);
 	g_raid_tr_update_state_concat(vol);
 	return (0);
 }
 
 static int
 g_raid_tr_start_concat(struct g_raid_tr_object *tr)
 {
 	struct g_raid_tr_concat_object *trs;
 	struct g_raid_volume *vol;
 
 	trs = (struct g_raid_tr_concat_object *)tr;
 	vol = tr->tro_volume;
 	trs->trso_starting = 0;
 	g_raid_tr_update_state_concat(vol);
 	return (0);
 }
 
 static int
 g_raid_tr_stop_concat(struct g_raid_tr_object *tr)
 {
 	struct g_raid_tr_concat_object *trs;
 	struct g_raid_volume *vol;
 
 	trs = (struct g_raid_tr_concat_object *)tr;
 	vol = tr->tro_volume;
 	trs->trso_starting = 0;
 	trs->trso_stopped = 1;
 	g_raid_tr_update_state_concat(vol);
 	return (0);
 }
 
 static void
 g_raid_tr_iostart_concat(struct g_raid_tr_object *tr, struct bio *bp)
 {
 	struct g_raid_volume *vol;
 	struct g_raid_subdisk *sd;
 	struct bio_queue_head queue;
 	struct bio *cbp;
 	char *addr;
 	off_t offset, length, remain;
 	u_int no;
 
 	vol = tr->tro_volume;
 	if (vol->v_state != G_RAID_VOLUME_S_OPTIMAL &&
 	    vol->v_state != G_RAID_VOLUME_S_SUBOPTIMAL) {
 		g_raid_iodone(bp, EIO);
 		return;
 	}
 	if (bp->bio_cmd == BIO_FLUSH) {
 		g_raid_tr_flush_common(tr, bp);
 		return;
 	}
 
 	offset = bp->bio_offset;
 	remain = bp->bio_length;
 	if ((bp->bio_flags & BIO_UNMAPPED) != 0)
 		addr = NULL;
 	else
 		addr = bp->bio_data;
 	no = 0;
 	while (no < vol->v_disks_count &&
 	    offset >= vol->v_subdisks[no].sd_size) {
 		offset -= vol->v_subdisks[no].sd_size;
 		no++;
 	}
 	KASSERT(no < vol->v_disks_count,
 	    ("Request starts after volume end (%ju)", bp->bio_offset));
 	bioq_init(&queue);
 	do {
 		sd = &vol->v_subdisks[no];
 		length = MIN(sd->sd_size - offset, remain);
 		cbp = g_clone_bio(bp);
 		if (cbp == NULL)
 			goto failure;
 		cbp->bio_offset = offset;
 		cbp->bio_length = length;
 		if ((bp->bio_flags & BIO_UNMAPPED) != 0 &&
 		    bp->bio_cmd != BIO_DELETE) {
 			cbp->bio_ma_offset += (uintptr_t)addr;
 			cbp->bio_ma += cbp->bio_ma_offset / PAGE_SIZE;
 			cbp->bio_ma_offset %= PAGE_SIZE;
 			cbp->bio_ma_n = round_page(cbp->bio_ma_offset +
 			    cbp->bio_length) / PAGE_SIZE;
 		} else
 			cbp->bio_data = addr;
 		cbp->bio_caller1 = sd;
 		bioq_insert_tail(&queue, cbp);
 		remain -= length;
 		if (bp->bio_cmd != BIO_DELETE)
 			addr += length;
 		offset = 0;
 		no++;
 		KASSERT(no < vol->v_disks_count || remain == 0,
 		    ("Request ends after volume end (%ju, %ju)",
 			bp->bio_offset, bp->bio_length));
 	} while (remain > 0);
 	while ((cbp = bioq_takefirst(&queue)) != NULL) {
 		sd = cbp->bio_caller1;
 		cbp->bio_caller1 = NULL;
 		g_raid_subdisk_iostart(sd, cbp);
 	}
 	return;
 failure:
 	while ((cbp = bioq_takefirst(&queue)) != NULL)
 		g_destroy_bio(cbp);
 	if (bp->bio_error == 0)
 		bp->bio_error = ENOMEM;
 	g_raid_iodone(bp, bp->bio_error);
 }
 
 static int
 g_raid_tr_kerneldump_concat(struct g_raid_tr_object *tr,
     void *virtual, vm_offset_t physical, off_t boffset, size_t blength)
 {
 	struct g_raid_volume *vol;
 	struct g_raid_subdisk *sd;
 	char *addr;
 	off_t offset, length, remain;
 	int error, no;
 
 	vol = tr->tro_volume;
 	if (vol->v_state != G_RAID_VOLUME_S_OPTIMAL)
 		return (ENXIO);
 
 	offset = boffset;
 	remain = blength;
 	addr = virtual;
 	no = 0;
 	while (no < vol->v_disks_count &&
 	    offset >= vol->v_subdisks[no].sd_size) {
 		offset -= vol->v_subdisks[no].sd_size;
 		no++;
 	}
 	KASSERT(no < vol->v_disks_count,
 	    ("Request starts after volume end (%ju)", boffset));
 	do {
 		sd = &vol->v_subdisks[no];
 		length = MIN(sd->sd_size - offset, remain);
 		error = g_raid_subdisk_kerneldump(&vol->v_subdisks[no],
 		    addr, 0, offset, length);
 		if (error != 0)
 			return (error);
 		remain -= length;
 		addr += length;
 		offset = 0;
 		no++;
 		KASSERT(no < vol->v_disks_count || remain == 0,
 		    ("Request ends after volume end (%ju, %zu)",
 			boffset, blength));
 	} while (remain > 0);
 	return (0);
 }
 
 static void
 g_raid_tr_iodone_concat(struct g_raid_tr_object *tr,
     struct g_raid_subdisk *sd,struct bio *bp)
 {
 	struct bio *pbp;
 
 	pbp = bp->bio_parent;
 	if (pbp->bio_error == 0)
 		pbp->bio_error = bp->bio_error;
 	g_destroy_bio(bp);
 	pbp->bio_inbed++;
 	if (pbp->bio_children == pbp->bio_inbed) {
 		pbp->bio_completed = pbp->bio_length;
 		g_raid_iodone(pbp, pbp->bio_error);
 	}
 }
 
 static int
 g_raid_tr_free_concat(struct g_raid_tr_object *tr)
 {
 
 	return (0);
 }
 
 G_RAID_TR_DECLARE(concat, "CONCAT");
Index: head/sys/geom/raid/tr_raid0.c
===================================================================
--- head/sys/geom/raid/tr_raid0.c	(revision 350693)
+++ head/sys/geom/raid/tr_raid0.c	(revision 350694)
@@ -1,337 +1,338 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2010 Alexander Motin <mav@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/bio.h>
 #include <sys/endian.h>
 #include <sys/kernel.h>
 #include <sys/kobj.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/systm.h>
 #include <geom/geom.h>
+#include <geom/geom_dbg.h>
 #include "geom/raid/g_raid.h"
 #include "g_raid_tr_if.h"
 
 static MALLOC_DEFINE(M_TR_RAID0, "tr_raid0_data", "GEOM_RAID RAID0 data");
 
 struct g_raid_tr_raid0_object {
 	struct g_raid_tr_object	 trso_base;
 	int			 trso_starting;
 	int			 trso_stopped;
 };
 
 static g_raid_tr_taste_t g_raid_tr_taste_raid0;
 static g_raid_tr_event_t g_raid_tr_event_raid0;
 static g_raid_tr_start_t g_raid_tr_start_raid0;
 static g_raid_tr_stop_t g_raid_tr_stop_raid0;
 static g_raid_tr_iostart_t g_raid_tr_iostart_raid0;
 static g_raid_tr_iodone_t g_raid_tr_iodone_raid0;
 static g_raid_tr_kerneldump_t g_raid_tr_kerneldump_raid0;
 static g_raid_tr_free_t g_raid_tr_free_raid0;
 
 static kobj_method_t g_raid_tr_raid0_methods[] = {
 	KOBJMETHOD(g_raid_tr_taste,	g_raid_tr_taste_raid0),
 	KOBJMETHOD(g_raid_tr_event,	g_raid_tr_event_raid0),
 	KOBJMETHOD(g_raid_tr_start,	g_raid_tr_start_raid0),
 	KOBJMETHOD(g_raid_tr_stop,	g_raid_tr_stop_raid0),
 	KOBJMETHOD(g_raid_tr_iostart,	g_raid_tr_iostart_raid0),
 	KOBJMETHOD(g_raid_tr_iodone,	g_raid_tr_iodone_raid0),
 	KOBJMETHOD(g_raid_tr_kerneldump,	g_raid_tr_kerneldump_raid0),
 	KOBJMETHOD(g_raid_tr_free,	g_raid_tr_free_raid0),
 	{ 0, 0 }
 };
 
 static struct g_raid_tr_class g_raid_tr_raid0_class = {
 	"RAID0",
 	g_raid_tr_raid0_methods,
 	sizeof(struct g_raid_tr_raid0_object),
 	.trc_enable = 1,
 	.trc_priority = 100,
 	.trc_accept_unmapped = 1
 };
 
 static int
 g_raid_tr_taste_raid0(struct g_raid_tr_object *tr, struct g_raid_volume *volume)
 {
 	struct g_raid_tr_raid0_object *trs;
 
 	trs = (struct g_raid_tr_raid0_object *)tr;
 	if (tr->tro_volume->v_raid_level != G_RAID_VOLUME_RL_RAID0 ||
 	    tr->tro_volume->v_raid_level_qualifier != G_RAID_VOLUME_RLQ_NONE)
 		return (G_RAID_TR_TASTE_FAIL);
 	trs->trso_starting = 1;
 	return (G_RAID_TR_TASTE_SUCCEED);
 }
 
 static int
 g_raid_tr_update_state_raid0(struct g_raid_volume *vol)
 {
 	struct g_raid_tr_raid0_object *trs;
 	struct g_raid_softc *sc;
 	u_int s;
 	int n, f;
 
 	sc = vol->v_softc;
 	trs = (struct g_raid_tr_raid0_object *)vol->v_tr;
 	if (trs->trso_stopped)
 		s = G_RAID_VOLUME_S_STOPPED;
 	else if (trs->trso_starting)
 		s = G_RAID_VOLUME_S_STARTING;
 	else {
 		n = g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_ACTIVE);
 		f = g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_FAILED);
 		if (n + f == vol->v_disks_count) {
 			if (f == 0)
 				s = G_RAID_VOLUME_S_OPTIMAL;
 			else
 				s = G_RAID_VOLUME_S_SUBOPTIMAL;
 		} else
 			s = G_RAID_VOLUME_S_BROKEN;
 	}
 	if (s != vol->v_state) {
 		g_raid_event_send(vol, G_RAID_VOLUME_S_ALIVE(s) ?
 		    G_RAID_VOLUME_E_UP : G_RAID_VOLUME_E_DOWN,
 		    G_RAID_EVENT_VOLUME);
 		g_raid_change_volume_state(vol, s);
 		if (!trs->trso_starting && !trs->trso_stopped)
 			g_raid_write_metadata(sc, vol, NULL, NULL);
 	}
 	return (0);
 }
 
 static int
 g_raid_tr_event_raid0(struct g_raid_tr_object *tr,
     struct g_raid_subdisk *sd, u_int event)
 {
 	struct g_raid_tr_raid0_object *trs;
 	struct g_raid_softc *sc;
 	struct g_raid_volume *vol;
 	int state;
 
 	trs = (struct g_raid_tr_raid0_object *)tr;
 	vol = tr->tro_volume;
 	sc = vol->v_softc;
 
 	state = sd->sd_state;
 	if (state != G_RAID_SUBDISK_S_NONE &&
 	    state != G_RAID_SUBDISK_S_FAILED &&
 	    state != G_RAID_SUBDISK_S_ACTIVE) {
 		G_RAID_DEBUG1(1, sc,
 		    "Promote subdisk %s:%d from %s to ACTIVE.",
 		    vol->v_name, sd->sd_pos,
 		    g_raid_subdisk_state2str(sd->sd_state));
 		g_raid_change_subdisk_state(sd, G_RAID_SUBDISK_S_ACTIVE);
 	}
 	if (state != sd->sd_state &&
 	    !trs->trso_starting && !trs->trso_stopped)
 		g_raid_write_metadata(sc, vol, sd, NULL);
 	g_raid_tr_update_state_raid0(vol);
 	return (0);
 }
 
 static int
 g_raid_tr_start_raid0(struct g_raid_tr_object *tr)
 {
 	struct g_raid_tr_raid0_object *trs;
 	struct g_raid_volume *vol;
 
 	trs = (struct g_raid_tr_raid0_object *)tr;
 	vol = tr->tro_volume;
 	trs->trso_starting = 0;
 	g_raid_tr_update_state_raid0(vol);
 	return (0);
 }
 
 static int
 g_raid_tr_stop_raid0(struct g_raid_tr_object *tr)
 {
 	struct g_raid_tr_raid0_object *trs;
 	struct g_raid_volume *vol;
 
 	trs = (struct g_raid_tr_raid0_object *)tr;
 	vol = tr->tro_volume;
 	trs->trso_starting = 0;
 	trs->trso_stopped = 1;
 	g_raid_tr_update_state_raid0(vol);
 	return (0);
 }
 
 static void
 g_raid_tr_iostart_raid0(struct g_raid_tr_object *tr, struct bio *bp)
 {
 	struct g_raid_volume *vol;
 	struct g_raid_subdisk *sd;
 	struct bio_queue_head queue;
 	struct bio *cbp;
 	char *addr;
 	off_t offset, start, length, nstripe, remain;
 	u_int no, strip_size;
 
 	vol = tr->tro_volume;
 	if (vol->v_state != G_RAID_VOLUME_S_OPTIMAL &&
 	    vol->v_state != G_RAID_VOLUME_S_SUBOPTIMAL) {
 		g_raid_iodone(bp, EIO);
 		return;
 	}
 	if (bp->bio_cmd == BIO_FLUSH) {
 		g_raid_tr_flush_common(tr, bp);
 		return;
 	}
 	if ((bp->bio_flags & BIO_UNMAPPED) != 0)
 		addr = NULL;
 	else
 		addr = bp->bio_data;
 	strip_size = vol->v_strip_size;
 
 	/* Stripe number. */
 	nstripe = bp->bio_offset / strip_size;
 	/* Start position in stripe. */
 	start = bp->bio_offset % strip_size;
 	/* Disk number. */
 	no = nstripe % vol->v_disks_count;
 	/* Stripe start position in disk. */
 	offset = (nstripe / vol->v_disks_count) * strip_size;
 	/* Length of data to operate. */
 	remain = bp->bio_length;
 
 	bioq_init(&queue);
 	do {
 		length = MIN(strip_size - start, remain);
 		cbp = g_clone_bio(bp);
 		if (cbp == NULL)
 			goto failure;
 		cbp->bio_offset = offset + start;
 		cbp->bio_length = length;
 		if ((bp->bio_flags & BIO_UNMAPPED) != 0 &&
 		    bp->bio_cmd != BIO_DELETE) {
 			cbp->bio_ma_offset += (uintptr_t)addr;
 			cbp->bio_ma += cbp->bio_ma_offset / PAGE_SIZE;
 			cbp->bio_ma_offset %= PAGE_SIZE;
 			cbp->bio_ma_n = round_page(cbp->bio_ma_offset +
 			    cbp->bio_length) / PAGE_SIZE;
 		} else
 			cbp->bio_data = addr;
 		cbp->bio_caller1 = &vol->v_subdisks[no];
 		bioq_insert_tail(&queue, cbp);
 		if (++no >= vol->v_disks_count) {
 			no = 0;
 			offset += strip_size;
 		}
 		remain -= length;
 		if (bp->bio_cmd != BIO_DELETE)
 			addr += length;
 		start = 0;
 	} while (remain > 0);
 	while ((cbp = bioq_takefirst(&queue)) != NULL) {
 		sd = cbp->bio_caller1;
 		cbp->bio_caller1 = NULL;
 		g_raid_subdisk_iostart(sd, cbp);
 	}
 	return;
 failure:
 	while ((cbp = bioq_takefirst(&queue)) != NULL)
 		g_destroy_bio(cbp);
 	if (bp->bio_error == 0)
 		bp->bio_error = ENOMEM;
 	g_raid_iodone(bp, bp->bio_error);
 }
 
 static int
 g_raid_tr_kerneldump_raid0(struct g_raid_tr_object *tr,
     void *virtual, vm_offset_t physical, off_t boffset, size_t blength)
 {
 	struct g_raid_volume *vol;
 	char *addr;
 	off_t offset, start, length, nstripe, remain;
 	u_int no, strip_size;
 	int error;
 
 	vol = tr->tro_volume;
 	if (vol->v_state != G_RAID_VOLUME_S_OPTIMAL)
 		return (ENXIO);
 	addr = virtual;
 	strip_size = vol->v_strip_size;
 
 	/* Stripe number. */
 	nstripe = boffset / strip_size;
 	/* Start position in stripe. */
 	start = boffset % strip_size;
 	/* Disk number. */
 	no = nstripe % vol->v_disks_count;
 	/* Stripe tart position in disk. */
 	offset = (nstripe / vol->v_disks_count) * strip_size;
 	/* Length of data to operate. */
 	remain = blength;
 
 	do {
 		length = MIN(strip_size - start, remain);
 		error = g_raid_subdisk_kerneldump(&vol->v_subdisks[no],
 		    addr, 0, offset + start, length);
 		if (error != 0)
 			return (error);
 		if (++no >= vol->v_disks_count) {
 			no = 0;
 			offset += strip_size;
 		}
 		remain -= length;
 		addr += length;
 		start = 0;
 	} while (remain > 0);
 	return (0);
 }
 
 static void
 g_raid_tr_iodone_raid0(struct g_raid_tr_object *tr,
     struct g_raid_subdisk *sd,struct bio *bp)
 {
 	struct bio *pbp;
 
 	pbp = bp->bio_parent;
 	if (pbp->bio_error == 0)
 		pbp->bio_error = bp->bio_error;
 	g_destroy_bio(bp);
 	pbp->bio_inbed++;
 	if (pbp->bio_children == pbp->bio_inbed) {
 		pbp->bio_completed = pbp->bio_length;
 		g_raid_iodone(pbp, pbp->bio_error);
 	}
 }
 
 static int
 g_raid_tr_free_raid0(struct g_raid_tr_object *tr)
 {
 
 	return (0);
 }
 
 G_RAID_TR_DECLARE(raid0, "RAID0");
Index: head/sys/geom/raid/tr_raid1.c
===================================================================
--- head/sys/geom/raid/tr_raid1.c	(revision 350693)
+++ head/sys/geom/raid/tr_raid1.c	(revision 350694)
@@ -1,986 +1,987 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2010 Alexander Motin <mav@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/bio.h>
 #include <sys/endian.h>
 #include <sys/kernel.h>
 #include <sys/kobj.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 #include <geom/geom.h>
+#include <geom/geom_dbg.h>
 #include "geom/raid/g_raid.h"
 #include "g_raid_tr_if.h"
 
 SYSCTL_DECL(_kern_geom_raid_raid1);
 
 #define RAID1_REBUILD_SLAB	(1 << 20) /* One transation in a rebuild */
 static int g_raid1_rebuild_slab = RAID1_REBUILD_SLAB;
 SYSCTL_UINT(_kern_geom_raid_raid1, OID_AUTO, rebuild_slab_size, CTLFLAG_RWTUN,
     &g_raid1_rebuild_slab, 0,
     "Amount of the disk to rebuild each read/write cycle of the rebuild.");
 
 #define RAID1_REBUILD_FAIR_IO 20 /* use 1/x of the available I/O */
 static int g_raid1_rebuild_fair_io = RAID1_REBUILD_FAIR_IO;
 SYSCTL_UINT(_kern_geom_raid_raid1, OID_AUTO, rebuild_fair_io, CTLFLAG_RWTUN,
     &g_raid1_rebuild_fair_io, 0,
     "Fraction of the I/O bandwidth to use when disk busy for rebuild.");
 
 #define RAID1_REBUILD_CLUSTER_IDLE 100
 static int g_raid1_rebuild_cluster_idle = RAID1_REBUILD_CLUSTER_IDLE;
 SYSCTL_UINT(_kern_geom_raid_raid1, OID_AUTO, rebuild_cluster_idle, CTLFLAG_RWTUN,
     &g_raid1_rebuild_cluster_idle, 0,
     "Number of slabs to do each time we trigger a rebuild cycle");
 
 #define RAID1_REBUILD_META_UPDATE 1024 /* update meta data every 1GB or so */
 static int g_raid1_rebuild_meta_update = RAID1_REBUILD_META_UPDATE;
 SYSCTL_UINT(_kern_geom_raid_raid1, OID_AUTO, rebuild_meta_update, CTLFLAG_RWTUN,
     &g_raid1_rebuild_meta_update, 0,
     "When to update the meta data.");
 
 static MALLOC_DEFINE(M_TR_RAID1, "tr_raid1_data", "GEOM_RAID RAID1 data");
 
 #define TR_RAID1_NONE 0
 #define TR_RAID1_REBUILD 1
 #define TR_RAID1_RESYNC 2
 
 #define TR_RAID1_F_DOING_SOME	0x1
 #define TR_RAID1_F_LOCKED	0x2
 #define TR_RAID1_F_ABORT	0x4
 
 struct g_raid_tr_raid1_object {
 	struct g_raid_tr_object	 trso_base;
 	int			 trso_starting;
 	int			 trso_stopping;
 	int			 trso_type;
 	int			 trso_recover_slabs; /* slabs before rest */
 	int			 trso_fair_io;
 	int			 trso_meta_update;
 	int			 trso_flags;
 	struct g_raid_subdisk	*trso_failed_sd; /* like per volume */
 	void			*trso_buffer;	 /* Buffer space */
 	struct bio		 trso_bio;
 };
 
 static g_raid_tr_taste_t g_raid_tr_taste_raid1;
 static g_raid_tr_event_t g_raid_tr_event_raid1;
 static g_raid_tr_start_t g_raid_tr_start_raid1;
 static g_raid_tr_stop_t g_raid_tr_stop_raid1;
 static g_raid_tr_iostart_t g_raid_tr_iostart_raid1;
 static g_raid_tr_iodone_t g_raid_tr_iodone_raid1;
 static g_raid_tr_kerneldump_t g_raid_tr_kerneldump_raid1;
 static g_raid_tr_locked_t g_raid_tr_locked_raid1;
 static g_raid_tr_idle_t g_raid_tr_idle_raid1;
 static g_raid_tr_free_t g_raid_tr_free_raid1;
 
 static kobj_method_t g_raid_tr_raid1_methods[] = {
 	KOBJMETHOD(g_raid_tr_taste,	g_raid_tr_taste_raid1),
 	KOBJMETHOD(g_raid_tr_event,	g_raid_tr_event_raid1),
 	KOBJMETHOD(g_raid_tr_start,	g_raid_tr_start_raid1),
 	KOBJMETHOD(g_raid_tr_stop,	g_raid_tr_stop_raid1),
 	KOBJMETHOD(g_raid_tr_iostart,	g_raid_tr_iostart_raid1),
 	KOBJMETHOD(g_raid_tr_iodone,	g_raid_tr_iodone_raid1),
 	KOBJMETHOD(g_raid_tr_kerneldump, g_raid_tr_kerneldump_raid1),
 	KOBJMETHOD(g_raid_tr_locked,	g_raid_tr_locked_raid1),
 	KOBJMETHOD(g_raid_tr_idle,	g_raid_tr_idle_raid1),
 	KOBJMETHOD(g_raid_tr_free,	g_raid_tr_free_raid1),
 	{ 0, 0 }
 };
 
 static struct g_raid_tr_class g_raid_tr_raid1_class = {
 	"RAID1",
 	g_raid_tr_raid1_methods,
 	sizeof(struct g_raid_tr_raid1_object),
 	.trc_enable = 1,
 	.trc_priority = 100,
 	.trc_accept_unmapped = 1
 };
 
 static void g_raid_tr_raid1_rebuild_abort(struct g_raid_tr_object *tr);
 static void g_raid_tr_raid1_maybe_rebuild(struct g_raid_tr_object *tr,
     struct g_raid_subdisk *sd);
 
 static int
 g_raid_tr_taste_raid1(struct g_raid_tr_object *tr, struct g_raid_volume *vol)
 {
 	struct g_raid_tr_raid1_object *trs;
 
 	trs = (struct g_raid_tr_raid1_object *)tr;
 	if (tr->tro_volume->v_raid_level != G_RAID_VOLUME_RL_RAID1 ||
 	    (tr->tro_volume->v_raid_level_qualifier != G_RAID_VOLUME_RLQ_R1SM &&
 	     tr->tro_volume->v_raid_level_qualifier != G_RAID_VOLUME_RLQ_R1MM))
 		return (G_RAID_TR_TASTE_FAIL);
 	trs->trso_starting = 1;
 	return (G_RAID_TR_TASTE_SUCCEED);
 }
 
 static int
 g_raid_tr_update_state_raid1(struct g_raid_volume *vol,
     struct g_raid_subdisk *sd)
 {
 	struct g_raid_tr_raid1_object *trs;
 	struct g_raid_softc *sc;
 	struct g_raid_subdisk *tsd, *bestsd;
 	u_int s;
 	int i, na, ns;
 
 	sc = vol->v_softc;
 	trs = (struct g_raid_tr_raid1_object *)vol->v_tr;
 	if (trs->trso_stopping &&
 	    (trs->trso_flags & TR_RAID1_F_DOING_SOME) == 0)
 		s = G_RAID_VOLUME_S_STOPPED;
 	else if (trs->trso_starting)
 		s = G_RAID_VOLUME_S_STARTING;
 	else {
 		/* Make sure we have at least one ACTIVE disk. */
 		na = g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_ACTIVE);
 		if (na == 0) {
 			/*
 			 * Critical situation! We have no any active disk!
 			 * Choose the best disk we have to make it active.
 			 */
 			bestsd = &vol->v_subdisks[0];
 			for (i = 1; i < vol->v_disks_count; i++) {
 				tsd = &vol->v_subdisks[i];
 				if (tsd->sd_state > bestsd->sd_state)
 					bestsd = tsd;
 				else if (tsd->sd_state == bestsd->sd_state &&
 				    (tsd->sd_state == G_RAID_SUBDISK_S_REBUILD ||
 				     tsd->sd_state == G_RAID_SUBDISK_S_RESYNC) &&
 				    tsd->sd_rebuild_pos > bestsd->sd_rebuild_pos)
 					bestsd = tsd;
 			}
 			if (bestsd->sd_state >= G_RAID_SUBDISK_S_UNINITIALIZED) {
 				/* We found reasonable candidate. */
 				G_RAID_DEBUG1(1, sc,
 				    "Promote subdisk %s:%d from %s to ACTIVE.",
 				    vol->v_name, bestsd->sd_pos,
 				    g_raid_subdisk_state2str(bestsd->sd_state));
 				g_raid_change_subdisk_state(bestsd,
 				    G_RAID_SUBDISK_S_ACTIVE);
 				g_raid_write_metadata(sc,
 				    vol, bestsd, bestsd->sd_disk);
 			}
 		}
 		na = g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_ACTIVE);
 		ns = g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_STALE) +
 		     g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_RESYNC);
 		if (na == vol->v_disks_count)
 			s = G_RAID_VOLUME_S_OPTIMAL;
 		else if (na + ns == vol->v_disks_count)
 			s = G_RAID_VOLUME_S_SUBOPTIMAL;
 		else if (na > 0)
 			s = G_RAID_VOLUME_S_DEGRADED;
 		else
 			s = G_RAID_VOLUME_S_BROKEN;
 		g_raid_tr_raid1_maybe_rebuild(vol->v_tr, sd);
 	}
 	if (s != vol->v_state) {
 		g_raid_event_send(vol, G_RAID_VOLUME_S_ALIVE(s) ?
 		    G_RAID_VOLUME_E_UP : G_RAID_VOLUME_E_DOWN,
 		    G_RAID_EVENT_VOLUME);
 		g_raid_change_volume_state(vol, s);
 		if (!trs->trso_starting && !trs->trso_stopping)
 			g_raid_write_metadata(sc, vol, NULL, NULL);
 	}
 	return (0);
 }
 
 static void
 g_raid_tr_raid1_fail_disk(struct g_raid_softc *sc, struct g_raid_subdisk *sd,
     struct g_raid_disk *disk)
 {
 	/*
 	 * We don't fail the last disk in the pack, since it still has decent
 	 * data on it and that's better than failing the disk if it is the root
 	 * file system.
 	 *
 	 * XXX should this be controlled via a tunable?  It makes sense for
 	 * the volume that has / on it.  I can't think of a case where we'd
 	 * want the volume to go away on this kind of event.
 	 */
 	if (g_raid_nsubdisks(sd->sd_volume, G_RAID_SUBDISK_S_ACTIVE) == 1 &&
 	    g_raid_get_subdisk(sd->sd_volume, G_RAID_SUBDISK_S_ACTIVE) == sd)
 		return;
 	g_raid_fail_disk(sc, sd, disk);
 }
 
 static void
 g_raid_tr_raid1_rebuild_some(struct g_raid_tr_object *tr)
 {
 	struct g_raid_tr_raid1_object *trs;
 	struct g_raid_subdisk *sd, *good_sd;
 	struct bio *bp;
 
 	trs = (struct g_raid_tr_raid1_object *)tr;
 	if (trs->trso_flags & TR_RAID1_F_DOING_SOME)
 		return;
 	sd = trs->trso_failed_sd;
 	good_sd = g_raid_get_subdisk(sd->sd_volume, G_RAID_SUBDISK_S_ACTIVE);
 	if (good_sd == NULL) {
 		g_raid_tr_raid1_rebuild_abort(tr);
 		return;
 	}
 	bp = &trs->trso_bio;
 	memset(bp, 0, sizeof(*bp));
 	bp->bio_offset = sd->sd_rebuild_pos;
 	bp->bio_length = MIN(g_raid1_rebuild_slab,
 	    sd->sd_size - sd->sd_rebuild_pos);
 	bp->bio_data = trs->trso_buffer;
 	bp->bio_cmd = BIO_READ;
 	bp->bio_cflags = G_RAID_BIO_FLAG_SYNC;
 	bp->bio_caller1 = good_sd;
 	trs->trso_flags |= TR_RAID1_F_DOING_SOME;
 	trs->trso_flags |= TR_RAID1_F_LOCKED;
 	g_raid_lock_range(sd->sd_volume,	/* Lock callback starts I/O */
 	   bp->bio_offset, bp->bio_length, NULL, bp);
 }
 
 static void
 g_raid_tr_raid1_rebuild_done(struct g_raid_tr_raid1_object *trs)
 {
 	struct g_raid_volume *vol;
 	struct g_raid_subdisk *sd;
 
 	vol = trs->trso_base.tro_volume;
 	sd = trs->trso_failed_sd;
 	g_raid_write_metadata(vol->v_softc, vol, sd, sd->sd_disk);
 	free(trs->trso_buffer, M_TR_RAID1);
 	trs->trso_buffer = NULL;
 	trs->trso_flags &= ~TR_RAID1_F_DOING_SOME;
 	trs->trso_type = TR_RAID1_NONE;
 	trs->trso_recover_slabs = 0;
 	trs->trso_failed_sd = NULL;
 	g_raid_tr_update_state_raid1(vol, NULL);
 }
 
 static void
 g_raid_tr_raid1_rebuild_finish(struct g_raid_tr_object *tr)
 {
 	struct g_raid_tr_raid1_object *trs;
 	struct g_raid_subdisk *sd;
 
 	trs = (struct g_raid_tr_raid1_object *)tr;
 	sd = trs->trso_failed_sd;
 	G_RAID_DEBUG1(0, tr->tro_volume->v_softc,
 	    "Subdisk %s:%d-%s rebuild completed.",
 	    sd->sd_volume->v_name, sd->sd_pos,
 	    sd->sd_disk ? g_raid_get_diskname(sd->sd_disk) : "[none]");
 	g_raid_change_subdisk_state(sd, G_RAID_SUBDISK_S_ACTIVE);
 	sd->sd_rebuild_pos = 0;
 	g_raid_tr_raid1_rebuild_done(trs);
 }
 
 static void
 g_raid_tr_raid1_rebuild_abort(struct g_raid_tr_object *tr)
 {
 	struct g_raid_tr_raid1_object *trs;
 	struct g_raid_subdisk *sd;
 	struct g_raid_volume *vol;
 	off_t len;
 
 	vol = tr->tro_volume;
 	trs = (struct g_raid_tr_raid1_object *)tr;
 	sd = trs->trso_failed_sd;
 	if (trs->trso_flags & TR_RAID1_F_DOING_SOME) {
 		G_RAID_DEBUG1(1, vol->v_softc,
 		    "Subdisk %s:%d-%s rebuild is aborting.",
 		    sd->sd_volume->v_name, sd->sd_pos,
 		    sd->sd_disk ? g_raid_get_diskname(sd->sd_disk) : "[none]");
 		trs->trso_flags |= TR_RAID1_F_ABORT;
 	} else {
 		G_RAID_DEBUG1(0, vol->v_softc,
 		    "Subdisk %s:%d-%s rebuild aborted.",
 		    sd->sd_volume->v_name, sd->sd_pos,
 		    sd->sd_disk ? g_raid_get_diskname(sd->sd_disk) : "[none]");
 		trs->trso_flags &= ~TR_RAID1_F_ABORT;
 		if (trs->trso_flags & TR_RAID1_F_LOCKED) {
 			trs->trso_flags &= ~TR_RAID1_F_LOCKED;
 			len = MIN(g_raid1_rebuild_slab,
 			    sd->sd_size - sd->sd_rebuild_pos);
 			g_raid_unlock_range(tr->tro_volume,
 			    sd->sd_rebuild_pos, len);
 		}
 		g_raid_tr_raid1_rebuild_done(trs);
 	}
 }
 
 static void
 g_raid_tr_raid1_rebuild_start(struct g_raid_tr_object *tr)
 {
 	struct g_raid_volume *vol;
 	struct g_raid_tr_raid1_object *trs;
 	struct g_raid_subdisk *sd, *fsd;
 
 	vol = tr->tro_volume;
 	trs = (struct g_raid_tr_raid1_object *)tr;
 	if (trs->trso_failed_sd) {
 		G_RAID_DEBUG1(1, vol->v_softc,
 		    "Already rebuild in start rebuild. pos %jd\n",
 		    (intmax_t)trs->trso_failed_sd->sd_rebuild_pos);
 		return;
 	}
 	sd = g_raid_get_subdisk(vol, G_RAID_SUBDISK_S_ACTIVE);
 	if (sd == NULL) {
 		G_RAID_DEBUG1(1, vol->v_softc,
 		    "No active disk to rebuild.  night night.");
 		return;
 	}
 	fsd = g_raid_get_subdisk(vol, G_RAID_SUBDISK_S_RESYNC);
 	if (fsd == NULL)
 		fsd = g_raid_get_subdisk(vol, G_RAID_SUBDISK_S_REBUILD);
 	if (fsd == NULL) {
 		fsd = g_raid_get_subdisk(vol, G_RAID_SUBDISK_S_STALE);
 		if (fsd != NULL) {
 			fsd->sd_rebuild_pos = 0;
 			g_raid_change_subdisk_state(fsd,
 			    G_RAID_SUBDISK_S_RESYNC);
 			g_raid_write_metadata(vol->v_softc, vol, fsd, NULL);
 		} else {
 			fsd = g_raid_get_subdisk(vol,
 			    G_RAID_SUBDISK_S_UNINITIALIZED);
 			if (fsd == NULL)
 				fsd = g_raid_get_subdisk(vol,
 				    G_RAID_SUBDISK_S_NEW);
 			if (fsd != NULL) {
 				fsd->sd_rebuild_pos = 0;
 				g_raid_change_subdisk_state(fsd,
 				    G_RAID_SUBDISK_S_REBUILD);
 				g_raid_write_metadata(vol->v_softc,
 				    vol, fsd, NULL);
 			}
 		}
 	}
 	if (fsd == NULL) {
 		G_RAID_DEBUG1(1, vol->v_softc,
 		    "No failed disk to rebuild.  night night.");
 		return;
 	}
 	trs->trso_failed_sd = fsd;
 	G_RAID_DEBUG1(0, vol->v_softc,
 	    "Subdisk %s:%d-%s rebuild start at %jd.",
 	    fsd->sd_volume->v_name, fsd->sd_pos,
 	    fsd->sd_disk ? g_raid_get_diskname(fsd->sd_disk) : "[none]",
 	    trs->trso_failed_sd->sd_rebuild_pos);
 	trs->trso_type = TR_RAID1_REBUILD;
 	trs->trso_buffer = malloc(g_raid1_rebuild_slab, M_TR_RAID1, M_WAITOK);
 	trs->trso_meta_update = g_raid1_rebuild_meta_update;
 	g_raid_tr_raid1_rebuild_some(tr);
 }
 
 
 static void
 g_raid_tr_raid1_maybe_rebuild(struct g_raid_tr_object *tr,
     struct g_raid_subdisk *sd)
 {
 	struct g_raid_volume *vol;
 	struct g_raid_tr_raid1_object *trs;
 	int na, nr;
 	
 	/*
 	 * If we're stopping, don't do anything.  If we don't have at least one
 	 * good disk and one bad disk, we don't do anything.  And if there's a
 	 * 'good disk' stored in the trs, then we're in progress and we punt.
 	 * If we make it past all these checks, we need to rebuild.
 	 */
 	vol = tr->tro_volume;
 	trs = (struct g_raid_tr_raid1_object *)tr;
 	if (trs->trso_stopping)
 		return;
 	na = g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_ACTIVE);
 	nr = g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_REBUILD) +
 	    g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_RESYNC);
 	switch(trs->trso_type) {
 	case TR_RAID1_NONE:
 		if (na == 0)
 			return;
 		if (nr == 0) {
 			nr = g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_NEW) +
 			    g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_STALE) +
 			    g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_UNINITIALIZED);
 			if (nr == 0)
 				return;
 		}
 		g_raid_tr_raid1_rebuild_start(tr);
 		break;
 	case TR_RAID1_REBUILD:
 		if (na == 0 || nr == 0 || trs->trso_failed_sd == sd)
 			g_raid_tr_raid1_rebuild_abort(tr);
 		break;
 	case TR_RAID1_RESYNC:
 		break;
 	}
 }
 
 static int
 g_raid_tr_event_raid1(struct g_raid_tr_object *tr,
     struct g_raid_subdisk *sd, u_int event)
 {
 
 	g_raid_tr_update_state_raid1(tr->tro_volume, sd);
 	return (0);
 }
 
 static int
 g_raid_tr_start_raid1(struct g_raid_tr_object *tr)
 {
 	struct g_raid_tr_raid1_object *trs;
 	struct g_raid_volume *vol;
 
 	trs = (struct g_raid_tr_raid1_object *)tr;
 	vol = tr->tro_volume;
 	trs->trso_starting = 0;
 	g_raid_tr_update_state_raid1(vol, NULL);
 	return (0);
 }
 
 static int
 g_raid_tr_stop_raid1(struct g_raid_tr_object *tr)
 {
 	struct g_raid_tr_raid1_object *trs;
 	struct g_raid_volume *vol;
 
 	trs = (struct g_raid_tr_raid1_object *)tr;
 	vol = tr->tro_volume;
 	trs->trso_starting = 0;
 	trs->trso_stopping = 1;
 	g_raid_tr_update_state_raid1(vol, NULL);
 	return (0);
 }
 
 /*
  * Select the disk to read from.  Take into account: subdisk state, running
  * error recovery, average disk load, head position and possible cache hits.
  */
 #define ABS(x)		(((x) >= 0) ? (x) : (-(x)))
 static struct g_raid_subdisk *
 g_raid_tr_raid1_select_read_disk(struct g_raid_volume *vol, struct bio *bp,
     u_int mask)
 {
 	struct g_raid_subdisk *sd, *best;
 	int i, prio, bestprio;
 
 	best = NULL;
 	bestprio = INT_MAX;
 	for (i = 0; i < vol->v_disks_count; i++) {
 		sd = &vol->v_subdisks[i];
 		if (sd->sd_state != G_RAID_SUBDISK_S_ACTIVE &&
 		    ((sd->sd_state != G_RAID_SUBDISK_S_REBUILD &&
 		      sd->sd_state != G_RAID_SUBDISK_S_RESYNC) ||
 		     bp->bio_offset + bp->bio_length > sd->sd_rebuild_pos))
 			continue;
 		if ((mask & (1 << i)) != 0)
 			continue;
 		prio = G_RAID_SUBDISK_LOAD(sd);
 		prio += min(sd->sd_recovery, 255) << 22;
 		prio += (G_RAID_SUBDISK_S_ACTIVE - sd->sd_state) << 16;
 		/* If disk head is precisely in position - highly prefer it. */
 		if (G_RAID_SUBDISK_POS(sd) == bp->bio_offset)
 			prio -= 2 * G_RAID_SUBDISK_LOAD_SCALE;
 		else
 		/* If disk head is close to position - prefer it. */
 		if (ABS(G_RAID_SUBDISK_POS(sd) - bp->bio_offset) <
 		    G_RAID_SUBDISK_TRACK_SIZE)
 			prio -= 1 * G_RAID_SUBDISK_LOAD_SCALE;
 		if (prio < bestprio) {
 			best = sd;
 			bestprio = prio;
 		}
 	}
 	return (best);
 }
 
 static void
 g_raid_tr_iostart_raid1_read(struct g_raid_tr_object *tr, struct bio *bp)
 {
 	struct g_raid_subdisk *sd;
 	struct bio *cbp;
 
 	sd = g_raid_tr_raid1_select_read_disk(tr->tro_volume, bp, 0);
 	KASSERT(sd != NULL, ("No active disks in volume %s.",
 		tr->tro_volume->v_name));
 
 	cbp = g_clone_bio(bp);
 	if (cbp == NULL) {
 		g_raid_iodone(bp, ENOMEM);
 		return;
 	}
 
 	g_raid_subdisk_iostart(sd, cbp);
 }
 
 static void
 g_raid_tr_iostart_raid1_write(struct g_raid_tr_object *tr, struct bio *bp)
 {
 	struct g_raid_volume *vol;
 	struct g_raid_subdisk *sd;
 	struct bio_queue_head queue;
 	struct bio *cbp;
 	int i;
 
 	vol = tr->tro_volume;
 
 	/*
 	 * Allocate all bios before sending any request, so we can return
 	 * ENOMEM in nice and clean way.
 	 */
 	bioq_init(&queue);
 	for (i = 0; i < vol->v_disks_count; i++) {
 		sd = &vol->v_subdisks[i];
 		switch (sd->sd_state) {
 		case G_RAID_SUBDISK_S_ACTIVE:
 			break;
 		case G_RAID_SUBDISK_S_REBUILD:
 			/*
 			 * When rebuilding, only part of this subdisk is
 			 * writable, the rest will be written as part of the
 			 * that process.
 			 */
 			if (bp->bio_offset >= sd->sd_rebuild_pos)
 				continue;
 			break;
 		case G_RAID_SUBDISK_S_STALE:
 		case G_RAID_SUBDISK_S_RESYNC:
 			/*
 			 * Resyncing still writes on the theory that the
 			 * resync'd disk is very close and writing it will
 			 * keep it that way better if we keep up while
 			 * resyncing.
 			 */
 			break;
 		default:
 			continue;
 		}
 		cbp = g_clone_bio(bp);
 		if (cbp == NULL)
 			goto failure;
 		cbp->bio_caller1 = sd;
 		bioq_insert_tail(&queue, cbp);
 	}
 	while ((cbp = bioq_takefirst(&queue)) != NULL) {
 		sd = cbp->bio_caller1;
 		cbp->bio_caller1 = NULL;
 		g_raid_subdisk_iostart(sd, cbp);
 	}
 	return;
 failure:
 	while ((cbp = bioq_takefirst(&queue)) != NULL)
 		g_destroy_bio(cbp);
 	if (bp->bio_error == 0)
 		bp->bio_error = ENOMEM;
 	g_raid_iodone(bp, bp->bio_error);
 }
 
 static void
 g_raid_tr_iostart_raid1(struct g_raid_tr_object *tr, struct bio *bp)
 {
 	struct g_raid_volume *vol;
 	struct g_raid_tr_raid1_object *trs;
 
 	vol = tr->tro_volume;
 	trs = (struct g_raid_tr_raid1_object *)tr;
 	if (vol->v_state != G_RAID_VOLUME_S_OPTIMAL &&
 	    vol->v_state != G_RAID_VOLUME_S_SUBOPTIMAL &&
 	    vol->v_state != G_RAID_VOLUME_S_DEGRADED) {
 		g_raid_iodone(bp, EIO);
 		return;
 	}
 	/*
 	 * If we're rebuilding, squeeze in rebuild activity every so often,
 	 * even when the disk is busy.  Be sure to only count real I/O
 	 * to the disk.  All 'SPECIAL' I/O is traffic generated to the disk
 	 * by this module.
 	 */
 	if (trs->trso_failed_sd != NULL &&
 	    !(bp->bio_cflags & G_RAID_BIO_FLAG_SPECIAL)) {
 		/* Make this new or running now round short. */
 		trs->trso_recover_slabs = 0;
 		if (--trs->trso_fair_io <= 0) {
 			trs->trso_fair_io = g_raid1_rebuild_fair_io;
 			g_raid_tr_raid1_rebuild_some(tr);
 		}
 	}
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 		g_raid_tr_iostart_raid1_read(tr, bp);
 		break;
 	case BIO_WRITE:
 	case BIO_DELETE:
 		g_raid_tr_iostart_raid1_write(tr, bp);
 		break;
 	case BIO_FLUSH:
 		g_raid_tr_flush_common(tr, bp);
 		break;
 	default:
 		KASSERT(1 == 0, ("Invalid command here: %u (volume=%s)",
 		    bp->bio_cmd, vol->v_name));
 		break;
 	}
 }
 
 static void
 g_raid_tr_iodone_raid1(struct g_raid_tr_object *tr,
     struct g_raid_subdisk *sd, struct bio *bp)
 {
 	struct bio *cbp;
 	struct g_raid_subdisk *nsd;
 	struct g_raid_volume *vol;
 	struct bio *pbp;
 	struct g_raid_tr_raid1_object *trs;
 	uintptr_t *mask;
 	int error, do_write;
 
 	trs = (struct g_raid_tr_raid1_object *)tr;
 	vol = tr->tro_volume;
 	if (bp->bio_cflags & G_RAID_BIO_FLAG_SYNC) {
 		/*
 		 * This operation is part of a rebuild or resync operation.
 		 * See what work just got done, then schedule the next bit of
 		 * work, if any.  Rebuild/resync is done a little bit at a
 		 * time.  Either when a timeout happens, or after we get a
 		 * bunch of I/Os to the disk (to make sure an active system
 		 * will complete in a sane amount of time).
 		 *
 		 * We are setup to do differing amounts of work for each of
 		 * these cases.  so long as the slabs is smallish (less than
 		 * 50 or so, I'd guess, but that's just a WAG), we shouldn't
 		 * have any bio starvation issues.  For active disks, we do
 		 * 5MB of data, for inactive ones, we do 50MB.
 		 */
 		if (trs->trso_type == TR_RAID1_REBUILD) {
 			if (bp->bio_cmd == BIO_READ) {
 
 				/* Immediately abort rebuild, if requested. */
 				if (trs->trso_flags & TR_RAID1_F_ABORT) {
 					trs->trso_flags &= ~TR_RAID1_F_DOING_SOME;
 					g_raid_tr_raid1_rebuild_abort(tr);
 					return;
 				}
 
 				/* On read error, skip and cross fingers. */
 				if (bp->bio_error != 0) {
 					G_RAID_LOGREQ(0, bp,
 					    "Read error during rebuild (%d), "
 					    "possible data loss!",
 					    bp->bio_error);
 					goto rebuild_round_done;
 				}
 
 				/*
 				 * The read operation finished, queue the
 				 * write and get out.
 				 */
 				G_RAID_LOGREQ(4, bp, "rebuild read done. %d",
 				    bp->bio_error);
 				bp->bio_cmd = BIO_WRITE;
 				bp->bio_cflags = G_RAID_BIO_FLAG_SYNC;
 				G_RAID_LOGREQ(4, bp, "Queueing rebuild write.");
 				g_raid_subdisk_iostart(trs->trso_failed_sd, bp);
 			} else {
 				/*
 				 * The write operation just finished.  Do
 				 * another.  We keep cloning the master bio
 				 * since it has the right buffers allocated to
 				 * it.
 				 */
 				G_RAID_LOGREQ(4, bp,
 				    "rebuild write done. Error %d",
 				    bp->bio_error);
 				nsd = trs->trso_failed_sd;
 				if (bp->bio_error != 0 ||
 				    trs->trso_flags & TR_RAID1_F_ABORT) {
 					if ((trs->trso_flags &
 					    TR_RAID1_F_ABORT) == 0) {
 						g_raid_tr_raid1_fail_disk(sd->sd_softc,
 						    nsd, nsd->sd_disk);
 					}
 					trs->trso_flags &= ~TR_RAID1_F_DOING_SOME;
 					g_raid_tr_raid1_rebuild_abort(tr);
 					return;
 				}
 rebuild_round_done:
 				nsd = trs->trso_failed_sd;
 				trs->trso_flags &= ~TR_RAID1_F_LOCKED;
 				g_raid_unlock_range(sd->sd_volume,
 				    bp->bio_offset, bp->bio_length);
 				nsd->sd_rebuild_pos += bp->bio_length;
 				if (nsd->sd_rebuild_pos >= nsd->sd_size) {
 					g_raid_tr_raid1_rebuild_finish(tr);
 					return;
 				}
 
 				/* Abort rebuild if we are stopping */
 				if (trs->trso_stopping) {
 					trs->trso_flags &= ~TR_RAID1_F_DOING_SOME;
 					g_raid_tr_raid1_rebuild_abort(tr);
 					return;
 				}
 
 				if (--trs->trso_meta_update <= 0) {
 					g_raid_write_metadata(vol->v_softc,
 					    vol, nsd, nsd->sd_disk);
 					trs->trso_meta_update =
 					    g_raid1_rebuild_meta_update;
 				}
 				trs->trso_flags &= ~TR_RAID1_F_DOING_SOME;
 				if (--trs->trso_recover_slabs <= 0)
 					return;
 				g_raid_tr_raid1_rebuild_some(tr);
 			}
 		} else if (trs->trso_type == TR_RAID1_RESYNC) {
 			/*
 			 * read good sd, read bad sd in parallel.  when both
 			 * done, compare the buffers.  write good to the bad
 			 * if different.  do the next bit of work.
 			 */
 			panic("Somehow, we think we're doing a resync");
 		}
 		return;
 	}
 	pbp = bp->bio_parent;
 	pbp->bio_inbed++;
 	if (bp->bio_cmd == BIO_READ && bp->bio_error != 0) {
 		/*
 		 * Read failed on first drive.  Retry the read error on
 		 * another disk drive, if available, before erroring out the
 		 * read.
 		 */
 		sd->sd_disk->d_read_errs++;
 		G_RAID_LOGREQ(0, bp,
 		    "Read error (%d), %d read errors total",
 		    bp->bio_error, sd->sd_disk->d_read_errs);
 
 		/*
 		 * If there are too many read errors, we move to degraded.
 		 * XXX Do we want to FAIL the drive (eg, make the user redo
 		 * everything to get it back in sync), or just degrade the
 		 * drive, which kicks off a resync?
 		 */
 		do_write = 1;
 		if (sd->sd_disk->d_read_errs > g_raid_read_err_thresh) {
 			g_raid_tr_raid1_fail_disk(sd->sd_softc, sd, sd->sd_disk);
 			if (pbp->bio_children == 1)
 				do_write = 0;
 		}
 
 		/*
 		 * Find the other disk, and try to do the I/O to it.
 		 */
 		mask = (uintptr_t *)(&pbp->bio_driver2);
 		if (pbp->bio_children == 1) {
 			/* Save original subdisk. */
 			pbp->bio_driver1 = do_write ? sd : NULL;
 			*mask = 0;
 		}
 		*mask |= 1 << sd->sd_pos;
 		nsd = g_raid_tr_raid1_select_read_disk(vol, pbp, *mask);
 		if (nsd != NULL && (cbp = g_clone_bio(pbp)) != NULL) {
 			g_destroy_bio(bp);
 			G_RAID_LOGREQ(2, cbp, "Retrying read from %d",
 			    nsd->sd_pos);
 			if (pbp->bio_children == 2 && do_write) {
 				sd->sd_recovery++;
 				cbp->bio_caller1 = nsd;
 				pbp->bio_pflags = G_RAID_BIO_FLAG_LOCKED;
 				/* Lock callback starts I/O */
 				g_raid_lock_range(sd->sd_volume,
 				    cbp->bio_offset, cbp->bio_length, pbp, cbp);
 			} else {
 				g_raid_subdisk_iostart(nsd, cbp);
 			}
 			return;
 		}
 		/*
 		 * We can't retry.  Return the original error by falling
 		 * through.  This will happen when there's only one good disk.
 		 * We don't need to fail the raid, since its actual state is
 		 * based on the state of the subdisks.
 		 */
 		G_RAID_LOGREQ(2, bp, "Couldn't retry read, failing it");
 	}
 	if (bp->bio_cmd == BIO_READ &&
 	    bp->bio_error == 0 &&
 	    pbp->bio_children > 1 &&
 	    pbp->bio_driver1 != NULL) {
 		/*
 		 * If it was a read, and bio_children is >1, then we just
 		 * recovered the data from the second drive.  We should try to
 		 * write that data to the first drive if sector remapping is
 		 * enabled.  A write should put the data in a new place on the
 		 * disk, remapping the bad sector.  Do we need to do that by
 		 * queueing a request to the main worker thread?  It doesn't
 		 * affect the return code of this current read, and can be
 		 * done at our leisure.  However, to make the code simpler, it
 		 * is done synchronously.
 		 */
 		G_RAID_LOGREQ(3, bp, "Recovered data from other drive");
 		cbp = g_clone_bio(pbp);
 		if (cbp != NULL) {
 			g_destroy_bio(bp);
 			cbp->bio_cmd = BIO_WRITE;
 			cbp->bio_cflags = G_RAID_BIO_FLAG_REMAP;
 			G_RAID_LOGREQ(2, cbp,
 			    "Attempting bad sector remap on failing drive.");
 			g_raid_subdisk_iostart(pbp->bio_driver1, cbp);
 			return;
 		}
 	}
 	if (pbp->bio_pflags & G_RAID_BIO_FLAG_LOCKED) {
 		/*
 		 * We're done with a recovery, mark the range as unlocked.
 		 * For any write errors, we aggressively fail the disk since
 		 * there was both a READ and a WRITE error at this location.
 		 * Both types of errors generally indicates the drive is on
 		 * the verge of total failure anyway.  Better to stop trusting
 		 * it now.  However, we need to reset error to 0 in that case
 		 * because we're not failing the original I/O which succeeded.
 		 */
 		if (bp->bio_cmd == BIO_WRITE && bp->bio_error) {
 			G_RAID_LOGREQ(0, bp, "Remap write failed: "
 			    "failing subdisk.");
 			g_raid_tr_raid1_fail_disk(sd->sd_softc, sd, sd->sd_disk);
 			bp->bio_error = 0;
 		}
 		if (pbp->bio_driver1 != NULL) {
 			((struct g_raid_subdisk *)pbp->bio_driver1)
 			    ->sd_recovery--;
 		}
 		G_RAID_LOGREQ(2, bp, "REMAP done %d.", bp->bio_error);
 		g_raid_unlock_range(sd->sd_volume, bp->bio_offset,
 		    bp->bio_length);
 	}
 	if (pbp->bio_cmd != BIO_READ) {
 		if (pbp->bio_inbed == 1 || pbp->bio_error != 0)
 			pbp->bio_error = bp->bio_error;
 		if (pbp->bio_cmd == BIO_WRITE && bp->bio_error != 0) {
 			G_RAID_LOGREQ(0, bp, "Write failed: failing subdisk.");
 			g_raid_tr_raid1_fail_disk(sd->sd_softc, sd, sd->sd_disk);
 		}
 		error = pbp->bio_error;
 	} else
 		error = bp->bio_error;
 	g_destroy_bio(bp);
 	if (pbp->bio_children == pbp->bio_inbed) {
 		pbp->bio_completed = pbp->bio_length;
 		g_raid_iodone(pbp, error);
 	}
 }
 
 static int
 g_raid_tr_kerneldump_raid1(struct g_raid_tr_object *tr,
     void *virtual, vm_offset_t physical, off_t offset, size_t length)
 {
 	struct g_raid_volume *vol;
 	struct g_raid_subdisk *sd;
 	int error, i, ok;
 
 	vol = tr->tro_volume;
 	error = 0;
 	ok = 0;
 	for (i = 0; i < vol->v_disks_count; i++) {
 		sd = &vol->v_subdisks[i];
 		switch (sd->sd_state) {
 		case G_RAID_SUBDISK_S_ACTIVE:
 			break;
 		case G_RAID_SUBDISK_S_REBUILD:
 			/*
 			 * When rebuilding, only part of this subdisk is
 			 * writable, the rest will be written as part of the
 			 * that process.
 			 */
 			if (offset >= sd->sd_rebuild_pos)
 				continue;
 			break;
 		case G_RAID_SUBDISK_S_STALE:
 		case G_RAID_SUBDISK_S_RESYNC:
 			/*
 			 * Resyncing still writes on the theory that the
 			 * resync'd disk is very close and writing it will
 			 * keep it that way better if we keep up while
 			 * resyncing.
 			 */
 			break;
 		default:
 			continue;
 		}
 		error = g_raid_subdisk_kerneldump(sd,
 		    virtual, physical, offset, length);
 		if (error == 0)
 			ok++;
 	}
 	return (ok > 0 ? 0 : error);
 }
 
 static int
 g_raid_tr_locked_raid1(struct g_raid_tr_object *tr, void *argp)
 {
 	struct bio *bp;
 	struct g_raid_subdisk *sd;
 
 	bp = (struct bio *)argp;
 	sd = (struct g_raid_subdisk *)bp->bio_caller1;
 	g_raid_subdisk_iostart(sd, bp);
 
 	return (0);
 }
 
 static int
 g_raid_tr_idle_raid1(struct g_raid_tr_object *tr)
 {
 	struct g_raid_tr_raid1_object *trs;
 
 	trs = (struct g_raid_tr_raid1_object *)tr;
 	trs->trso_fair_io = g_raid1_rebuild_fair_io;
 	trs->trso_recover_slabs = g_raid1_rebuild_cluster_idle;
 	if (trs->trso_type == TR_RAID1_REBUILD)
 		g_raid_tr_raid1_rebuild_some(tr);
 	return (0);
 }
 
 static int
 g_raid_tr_free_raid1(struct g_raid_tr_object *tr)
 {
 	struct g_raid_tr_raid1_object *trs;
 
 	trs = (struct g_raid_tr_raid1_object *)tr;
 
 	if (trs->trso_buffer != NULL) {
 		free(trs->trso_buffer, M_TR_RAID1);
 		trs->trso_buffer = NULL;
 	}
 	return (0);
 }
 
 G_RAID_TR_DECLARE(raid1, "RAID1");
Index: head/sys/geom/raid/tr_raid1e.c
===================================================================
--- head/sys/geom/raid/tr_raid1e.c	(revision 350693)
+++ head/sys/geom/raid/tr_raid1e.c	(revision 350694)
@@ -1,1244 +1,1245 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2010 Alexander Motin <mav@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/bio.h>
 #include <sys/endian.h>
 #include <sys/kernel.h>
 #include <sys/kobj.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 #include <geom/geom.h>
+#include <geom/geom_dbg.h>
 #include "geom/raid/g_raid.h"
 #include "g_raid_tr_if.h"
 
 #define N	2
 
 SYSCTL_DECL(_kern_geom_raid_raid1e);
 
 #define RAID1E_REBUILD_SLAB	(1 << 20) /* One transation in a rebuild */
 static int g_raid1e_rebuild_slab = RAID1E_REBUILD_SLAB;
 SYSCTL_UINT(_kern_geom_raid_raid1e, OID_AUTO, rebuild_slab_size, CTLFLAG_RWTUN,
     &g_raid1e_rebuild_slab, 0,
     "Amount of the disk to rebuild each read/write cycle of the rebuild.");
 
 #define RAID1E_REBUILD_FAIR_IO 20 /* use 1/x of the available I/O */
 static int g_raid1e_rebuild_fair_io = RAID1E_REBUILD_FAIR_IO;
 SYSCTL_UINT(_kern_geom_raid_raid1e, OID_AUTO, rebuild_fair_io, CTLFLAG_RWTUN,
     &g_raid1e_rebuild_fair_io, 0,
     "Fraction of the I/O bandwidth to use when disk busy for rebuild.");
 
 #define RAID1E_REBUILD_CLUSTER_IDLE 100
 static int g_raid1e_rebuild_cluster_idle = RAID1E_REBUILD_CLUSTER_IDLE;
 SYSCTL_UINT(_kern_geom_raid_raid1e, OID_AUTO, rebuild_cluster_idle, CTLFLAG_RWTUN,
     &g_raid1e_rebuild_cluster_idle, 0,
     "Number of slabs to do each time we trigger a rebuild cycle");
 
 #define RAID1E_REBUILD_META_UPDATE 1024 /* update meta data every 1GB or so */
 static int g_raid1e_rebuild_meta_update = RAID1E_REBUILD_META_UPDATE;
 SYSCTL_UINT(_kern_geom_raid_raid1e, OID_AUTO, rebuild_meta_update, CTLFLAG_RWTUN,
     &g_raid1e_rebuild_meta_update, 0,
     "When to update the meta data.");
 
 static MALLOC_DEFINE(M_TR_RAID1E, "tr_raid1e_data", "GEOM_RAID RAID1E data");
 
 #define TR_RAID1E_NONE 0
 #define TR_RAID1E_REBUILD 1
 #define TR_RAID1E_RESYNC 2
 
 #define TR_RAID1E_F_DOING_SOME	0x1
 #define TR_RAID1E_F_LOCKED	0x2
 #define TR_RAID1E_F_ABORT	0x4
 
 struct g_raid_tr_raid1e_object {
 	struct g_raid_tr_object	 trso_base;
 	int			 trso_starting;
 	int			 trso_stopping;
 	int			 trso_type;
 	int			 trso_recover_slabs; /* slabs before rest */
 	int			 trso_fair_io;
 	int			 trso_meta_update;
 	int			 trso_flags;
 	struct g_raid_subdisk	*trso_failed_sd; /* like per volume */
 	void			*trso_buffer;	 /* Buffer space */
 	off_t			 trso_lock_pos; /* Locked range start. */
 	off_t			 trso_lock_len; /* Locked range length. */
 	struct bio		 trso_bio;
 };
 
 static g_raid_tr_taste_t g_raid_tr_taste_raid1e;
 static g_raid_tr_event_t g_raid_tr_event_raid1e;
 static g_raid_tr_start_t g_raid_tr_start_raid1e;
 static g_raid_tr_stop_t g_raid_tr_stop_raid1e;
 static g_raid_tr_iostart_t g_raid_tr_iostart_raid1e;
 static g_raid_tr_iodone_t g_raid_tr_iodone_raid1e;
 static g_raid_tr_kerneldump_t g_raid_tr_kerneldump_raid1e;
 static g_raid_tr_locked_t g_raid_tr_locked_raid1e;
 static g_raid_tr_idle_t g_raid_tr_idle_raid1e;
 static g_raid_tr_free_t g_raid_tr_free_raid1e;
 
 static kobj_method_t g_raid_tr_raid1e_methods[] = {
 	KOBJMETHOD(g_raid_tr_taste,	g_raid_tr_taste_raid1e),
 	KOBJMETHOD(g_raid_tr_event,	g_raid_tr_event_raid1e),
 	KOBJMETHOD(g_raid_tr_start,	g_raid_tr_start_raid1e),
 	KOBJMETHOD(g_raid_tr_stop,	g_raid_tr_stop_raid1e),
 	KOBJMETHOD(g_raid_tr_iostart,	g_raid_tr_iostart_raid1e),
 	KOBJMETHOD(g_raid_tr_iodone,	g_raid_tr_iodone_raid1e),
 	KOBJMETHOD(g_raid_tr_kerneldump, g_raid_tr_kerneldump_raid1e),
 	KOBJMETHOD(g_raid_tr_locked,	g_raid_tr_locked_raid1e),
 	KOBJMETHOD(g_raid_tr_idle,	g_raid_tr_idle_raid1e),
 	KOBJMETHOD(g_raid_tr_free,	g_raid_tr_free_raid1e),
 	{ 0, 0 }
 };
 
 static struct g_raid_tr_class g_raid_tr_raid1e_class = {
 	"RAID1E",
 	g_raid_tr_raid1e_methods,
 	sizeof(struct g_raid_tr_raid1e_object),
 	.trc_enable = 1,
 	.trc_priority = 200,
 	.trc_accept_unmapped = 1
 };
 
 static void g_raid_tr_raid1e_rebuild_abort(struct g_raid_tr_object *tr);
 static void g_raid_tr_raid1e_maybe_rebuild(struct g_raid_tr_object *tr,
     struct g_raid_subdisk *sd);
 static int g_raid_tr_raid1e_select_read_disk(struct g_raid_volume *vol,
     int no, off_t off, off_t len, u_int mask);
 
 static inline void
 V2P(struct g_raid_volume *vol, off_t virt,
     int *disk, off_t *offset, off_t *start)
 {
 	off_t nstrip;
 	u_int strip_size;
 
 	strip_size = vol->v_strip_size;
 	/* Strip number. */
 	nstrip = virt / strip_size;
 	/* Start position in strip. */
 	*start = virt % strip_size;
 	/* Disk number. */
 	*disk = (nstrip * N) % vol->v_disks_count;
 	/* Strip start position in disk. */
 	*offset = ((nstrip * N) / vol->v_disks_count) * strip_size;
 }
 
 static inline void
 P2V(struct g_raid_volume *vol, int disk, off_t offset,
     off_t *virt, int *copy)
 {
 	off_t nstrip, start;
 	u_int strip_size;
 
 	strip_size = vol->v_strip_size;
 	/* Start position in strip. */
 	start = offset % strip_size;
 	/* Physical strip number. */
 	nstrip = (offset / strip_size) * vol->v_disks_count + disk;
 	/* Number of physical strip (copy) inside virtual strip. */
 	*copy = nstrip % N;
 	/* Offset in virtual space. */
 	*virt = (nstrip / N) * strip_size + start;
 }
 
 static int
 g_raid_tr_taste_raid1e(struct g_raid_tr_object *tr, struct g_raid_volume *vol)
 {
 	struct g_raid_tr_raid1e_object *trs;
 
 	trs = (struct g_raid_tr_raid1e_object *)tr;
 	if (tr->tro_volume->v_raid_level != G_RAID_VOLUME_RL_RAID1E ||
 	    tr->tro_volume->v_raid_level_qualifier != G_RAID_VOLUME_RLQ_R1EA)
 		return (G_RAID_TR_TASTE_FAIL);
 	trs->trso_starting = 1;
 	return (G_RAID_TR_TASTE_SUCCEED);
 }
 
 static int
 g_raid_tr_update_state_raid1e_even(struct g_raid_volume *vol)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_subdisk *sd, *bestsd, *worstsd;
 	int i, j, state, sstate;
 
 	sc = vol->v_softc;
 	state = G_RAID_VOLUME_S_OPTIMAL;
 	for (i = 0; i < vol->v_disks_count / N; i++) {
 		bestsd = &vol->v_subdisks[i * N];
 		for (j = 1; j < N; j++) {
 			sd = &vol->v_subdisks[i * N + j];
 			if (sd->sd_state > bestsd->sd_state)
 				bestsd = sd;
 			else if (sd->sd_state == bestsd->sd_state &&
 			    (sd->sd_state == G_RAID_SUBDISK_S_REBUILD ||
 			     sd->sd_state == G_RAID_SUBDISK_S_RESYNC) &&
 			    sd->sd_rebuild_pos > bestsd->sd_rebuild_pos)
 				bestsd = sd;
 		}
 		if (bestsd->sd_state >= G_RAID_SUBDISK_S_UNINITIALIZED &&
 		    bestsd->sd_state != G_RAID_SUBDISK_S_ACTIVE) {
 			/* We found reasonable candidate. */
 			G_RAID_DEBUG1(1, sc,
 			    "Promote subdisk %s:%d from %s to ACTIVE.",
 			    vol->v_name, bestsd->sd_pos,
 			    g_raid_subdisk_state2str(bestsd->sd_state));
 			g_raid_change_subdisk_state(bestsd,
 			    G_RAID_SUBDISK_S_ACTIVE);
 			g_raid_write_metadata(sc,
 			    vol, bestsd, bestsd->sd_disk);
 		}
 		worstsd = &vol->v_subdisks[i * N];
 		for (j = 1; j < N; j++) {
 			sd = &vol->v_subdisks[i * N + j];
 			if (sd->sd_state < worstsd->sd_state)
 				worstsd = sd;
 		}
 		if (worstsd->sd_state == G_RAID_SUBDISK_S_ACTIVE)
 			sstate = G_RAID_VOLUME_S_OPTIMAL;
 		else if (worstsd->sd_state >= G_RAID_SUBDISK_S_STALE)
 			sstate = G_RAID_VOLUME_S_SUBOPTIMAL;
 		else if (bestsd->sd_state == G_RAID_SUBDISK_S_ACTIVE)
 			sstate = G_RAID_VOLUME_S_DEGRADED;
 		else
 			sstate = G_RAID_VOLUME_S_BROKEN;
 		if (sstate < state)
 			state = sstate;
 	}
 	return (state);
 }
 
 static int
 g_raid_tr_update_state_raid1e_odd(struct g_raid_volume *vol)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_subdisk *sd, *bestsd, *worstsd;
 	int i, j, state, sstate;
 
 	sc = vol->v_softc;
 	if (g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_ACTIVE) ==
 	    vol->v_disks_count)
 		return (G_RAID_VOLUME_S_OPTIMAL);
 	for (i = 0; i < vol->v_disks_count; i++) {
 		sd = &vol->v_subdisks[i];
 		if (sd->sd_state == G_RAID_SUBDISK_S_UNINITIALIZED) {
 			/* We found reasonable candidate. */
 			G_RAID_DEBUG1(1, sc,
 			    "Promote subdisk %s:%d from %s to STALE.",
 			    vol->v_name, sd->sd_pos,
 			    g_raid_subdisk_state2str(sd->sd_state));
 			g_raid_change_subdisk_state(sd,
 			    G_RAID_SUBDISK_S_STALE);
 			g_raid_write_metadata(sc, vol, sd, sd->sd_disk);
 		}
 	}
 	state = G_RAID_VOLUME_S_OPTIMAL;
 	for (i = 0; i < vol->v_disks_count; i++) {
 		bestsd = &vol->v_subdisks[i];
 		worstsd = &vol->v_subdisks[i];
 		for (j = 1; j < N; j++) {
 			sd = &vol->v_subdisks[(i + j) % vol->v_disks_count];
 			if (sd->sd_state > bestsd->sd_state)
 				bestsd = sd;
 			else if (sd->sd_state == bestsd->sd_state &&
 			    (sd->sd_state == G_RAID_SUBDISK_S_REBUILD ||
 			     sd->sd_state == G_RAID_SUBDISK_S_RESYNC) &&
 			    sd->sd_rebuild_pos > bestsd->sd_rebuild_pos)
 				bestsd = sd;
 			if (sd->sd_state < worstsd->sd_state)
 				worstsd = sd;
 		}
 		if (worstsd->sd_state == G_RAID_SUBDISK_S_ACTIVE)
 			sstate = G_RAID_VOLUME_S_OPTIMAL;
 		else if (worstsd->sd_state >= G_RAID_SUBDISK_S_STALE)
 			sstate = G_RAID_VOLUME_S_SUBOPTIMAL;
 		else if (bestsd->sd_state >= G_RAID_SUBDISK_S_STALE)
 			sstate = G_RAID_VOLUME_S_DEGRADED;
 		else
 			sstate = G_RAID_VOLUME_S_BROKEN;
 		if (sstate < state)
 			state = sstate;
 	}
 	return (state);
 }
 
 static int
 g_raid_tr_update_state_raid1e(struct g_raid_volume *vol,
     struct g_raid_subdisk *sd)
 {
 	struct g_raid_tr_raid1e_object *trs;
 	struct g_raid_softc *sc;
 	u_int s;
 
 	sc = vol->v_softc;
 	trs = (struct g_raid_tr_raid1e_object *)vol->v_tr;
 	if (trs->trso_stopping &&
 	    (trs->trso_flags & TR_RAID1E_F_DOING_SOME) == 0)
 		s = G_RAID_VOLUME_S_STOPPED;
 	else if (trs->trso_starting)
 		s = G_RAID_VOLUME_S_STARTING;
 	else {
 		if ((vol->v_disks_count % N) == 0)
 			s = g_raid_tr_update_state_raid1e_even(vol);
 		else
 			s = g_raid_tr_update_state_raid1e_odd(vol);
 	}
 	if (s != vol->v_state) {
 		g_raid_event_send(vol, G_RAID_VOLUME_S_ALIVE(s) ?
 		    G_RAID_VOLUME_E_UP : G_RAID_VOLUME_E_DOWN,
 		    G_RAID_EVENT_VOLUME);
 		g_raid_change_volume_state(vol, s);
 		if (!trs->trso_starting && !trs->trso_stopping)
 			g_raid_write_metadata(sc, vol, NULL, NULL);
 	}
 	if (!trs->trso_starting && !trs->trso_stopping)
 		g_raid_tr_raid1e_maybe_rebuild(vol->v_tr, sd);
 	return (0);
 }
 
 static void
 g_raid_tr_raid1e_fail_disk(struct g_raid_softc *sc, struct g_raid_subdisk *sd,
     struct g_raid_disk *disk)
 {
 	struct g_raid_volume *vol;
 
 	vol = sd->sd_volume;
 	/*
 	 * We don't fail the last disk in the pack, since it still has decent
 	 * data on it and that's better than failing the disk if it is the root
 	 * file system.
 	 *
 	 * XXX should this be controlled via a tunable?  It makes sense for
 	 * the volume that has / on it.  I can't think of a case where we'd
 	 * want the volume to go away on this kind of event.
 	 */
 	if ((g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_ACTIVE) +
 	     g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_RESYNC) +
 	     g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_STALE) +
 	     g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_UNINITIALIZED) <
 	     vol->v_disks_count) &&
 	    (sd->sd_state >= G_RAID_SUBDISK_S_UNINITIALIZED))
 		return;
 	g_raid_fail_disk(sc, sd, disk);
 }
 
 static void
 g_raid_tr_raid1e_rebuild_done(struct g_raid_tr_raid1e_object *trs)
 {
 	struct g_raid_volume *vol;
 	struct g_raid_subdisk *sd;
 
 	vol = trs->trso_base.tro_volume;
 	sd = trs->trso_failed_sd;
 	g_raid_write_metadata(vol->v_softc, vol, sd, sd->sd_disk);
 	free(trs->trso_buffer, M_TR_RAID1E);
 	trs->trso_buffer = NULL;
 	trs->trso_flags &= ~TR_RAID1E_F_DOING_SOME;
 	trs->trso_type = TR_RAID1E_NONE;
 	trs->trso_recover_slabs = 0;
 	trs->trso_failed_sd = NULL;
 	g_raid_tr_update_state_raid1e(vol, NULL);
 }
 
 static void
 g_raid_tr_raid1e_rebuild_finish(struct g_raid_tr_object *tr)
 {
 	struct g_raid_tr_raid1e_object *trs;
 	struct g_raid_subdisk *sd;
 
 	trs = (struct g_raid_tr_raid1e_object *)tr;
 	sd = trs->trso_failed_sd;
 	G_RAID_DEBUG1(0, tr->tro_volume->v_softc,
 	    "Subdisk %s:%d-%s rebuild completed.",
 	    sd->sd_volume->v_name, sd->sd_pos,
 	    sd->sd_disk ? g_raid_get_diskname(sd->sd_disk) : "[none]");
 	g_raid_change_subdisk_state(sd, G_RAID_SUBDISK_S_ACTIVE);
 	sd->sd_rebuild_pos = 0;
 	g_raid_tr_raid1e_rebuild_done(trs);
 }
 
 static void
 g_raid_tr_raid1e_rebuild_abort(struct g_raid_tr_object *tr)
 {
 	struct g_raid_tr_raid1e_object *trs;
 	struct g_raid_subdisk *sd;
 	struct g_raid_volume *vol;
 
 	vol = tr->tro_volume;
 	trs = (struct g_raid_tr_raid1e_object *)tr;
 	sd = trs->trso_failed_sd;
 	if (trs->trso_flags & TR_RAID1E_F_DOING_SOME) {
 		G_RAID_DEBUG1(1, vol->v_softc,
 		    "Subdisk %s:%d-%s rebuild is aborting.",
 		    sd->sd_volume->v_name, sd->sd_pos,
 		    sd->sd_disk ? g_raid_get_diskname(sd->sd_disk) : "[none]");
 		trs->trso_flags |= TR_RAID1E_F_ABORT;
 	} else {
 		G_RAID_DEBUG1(0, vol->v_softc,
 		    "Subdisk %s:%d-%s rebuild aborted.",
 		    sd->sd_volume->v_name, sd->sd_pos,
 		    sd->sd_disk ? g_raid_get_diskname(sd->sd_disk) : "[none]");
 		trs->trso_flags &= ~TR_RAID1E_F_ABORT;
 		if (trs->trso_flags & TR_RAID1E_F_LOCKED) {
 			trs->trso_flags &= ~TR_RAID1E_F_LOCKED;
 			g_raid_unlock_range(tr->tro_volume,
 			    trs->trso_lock_pos, trs->trso_lock_len);
 		}
 		g_raid_tr_raid1e_rebuild_done(trs);
 	}
 }
 
 static void
 g_raid_tr_raid1e_rebuild_some(struct g_raid_tr_object *tr)
 {
 	struct g_raid_tr_raid1e_object *trs;
 	struct g_raid_softc *sc;
 	struct g_raid_volume *vol;
 	struct g_raid_subdisk *sd;
 	struct bio *bp;
 	off_t len, virtual, vend, offset, start;
 	int disk, copy, best;
 
 	trs = (struct g_raid_tr_raid1e_object *)tr;
 	if (trs->trso_flags & TR_RAID1E_F_DOING_SOME)
 		return;
 	vol = tr->tro_volume;
 	sc = vol->v_softc;
 	sd = trs->trso_failed_sd;
 
 	while (1) {
 		if (sd->sd_rebuild_pos >= sd->sd_size) {
 			g_raid_tr_raid1e_rebuild_finish(tr);
 			return;
 		}
 		/* Get virtual offset from physical rebuild position. */
 		P2V(vol, sd->sd_pos, sd->sd_rebuild_pos, &virtual, &copy);
 		/* Get physical offset back to get first stripe position. */
 		V2P(vol, virtual, &disk, &offset, &start);
 		/* Calculate contignous data length. */
 		len = MIN(g_raid1e_rebuild_slab,
 		    sd->sd_size - sd->sd_rebuild_pos);
 		if ((vol->v_disks_count % N) != 0)
 			len = MIN(len, vol->v_strip_size - start);
 		/* Find disk with most accurate data. */
 		best = g_raid_tr_raid1e_select_read_disk(vol, disk,
 		    offset + start, len, 0);
 		if (best < 0) {
 			/* There is no any valid disk. */
 			g_raid_tr_raid1e_rebuild_abort(tr);
 			return;
 		} else if (best != copy) {
 			/* Some other disk has better data. */
 			break;
 		}
 		/* We have the most accurate data. Skip the range. */
 		G_RAID_DEBUG1(3, sc, "Skipping rebuild for range %ju - %ju",
 		    sd->sd_rebuild_pos, sd->sd_rebuild_pos + len);
 		sd->sd_rebuild_pos += len;
 	}
 
 	bp = &trs->trso_bio;
 	memset(bp, 0, sizeof(*bp));
 	bp->bio_offset = offset + start +
 	    ((disk + best >= vol->v_disks_count) ? vol->v_strip_size : 0);
 	bp->bio_length = len;
 	bp->bio_data = trs->trso_buffer;
 	bp->bio_cmd = BIO_READ;
 	bp->bio_cflags = G_RAID_BIO_FLAG_SYNC;
 	bp->bio_caller1 = &vol->v_subdisks[(disk + best) % vol->v_disks_count];
 	G_RAID_LOGREQ(3, bp, "Queueing rebuild read");
 	/*
 	 * If we are crossing stripe boundary, correct affected virtual
 	 * range we should lock.
 	 */
 	if (start + len > vol->v_strip_size) {
 		P2V(vol, sd->sd_pos, sd->sd_rebuild_pos + len, &vend, &copy);
 		len = vend - virtual;
 	}
 	trs->trso_flags |= TR_RAID1E_F_DOING_SOME;
 	trs->trso_flags |= TR_RAID1E_F_LOCKED;
 	trs->trso_lock_pos = virtual;
 	trs->trso_lock_len = len;
 	/* Lock callback starts I/O */
 	g_raid_lock_range(sd->sd_volume, virtual, len, NULL, bp);
 }
 
 static void
 g_raid_tr_raid1e_rebuild_start(struct g_raid_tr_object *tr)
 {
 	struct g_raid_volume *vol;
 	struct g_raid_tr_raid1e_object *trs;
 	struct g_raid_subdisk *sd;
 
 	vol = tr->tro_volume;
 	trs = (struct g_raid_tr_raid1e_object *)tr;
 	if (trs->trso_failed_sd) {
 		G_RAID_DEBUG1(1, vol->v_softc,
 		    "Already rebuild in start rebuild. pos %jd\n",
 		    (intmax_t)trs->trso_failed_sd->sd_rebuild_pos);
 		return;
 	}
 	sd = g_raid_get_subdisk(vol, G_RAID_SUBDISK_S_RESYNC);
 	if (sd == NULL)
 		sd = g_raid_get_subdisk(vol, G_RAID_SUBDISK_S_REBUILD);
 	if (sd == NULL) {
 		sd = g_raid_get_subdisk(vol, G_RAID_SUBDISK_S_STALE);
 		if (sd != NULL) {
 			sd->sd_rebuild_pos = 0;
 			g_raid_change_subdisk_state(sd,
 			    G_RAID_SUBDISK_S_RESYNC);
 			g_raid_write_metadata(vol->v_softc, vol, sd, NULL);
 		} else {
 			sd = g_raid_get_subdisk(vol,
 			    G_RAID_SUBDISK_S_UNINITIALIZED);
 			if (sd == NULL)
 				sd = g_raid_get_subdisk(vol,
 				    G_RAID_SUBDISK_S_NEW);
 			if (sd != NULL) {
 				sd->sd_rebuild_pos = 0;
 				g_raid_change_subdisk_state(sd,
 				    G_RAID_SUBDISK_S_REBUILD);
 				g_raid_write_metadata(vol->v_softc,
 				    vol, sd, NULL);
 			}
 		}
 	}
 	if (sd == NULL) {
 		G_RAID_DEBUG1(1, vol->v_softc,
 		    "No failed disk to rebuild.  night night.");
 		return;
 	}
 	trs->trso_failed_sd = sd;
 	G_RAID_DEBUG1(0, vol->v_softc,
 	    "Subdisk %s:%d-%s rebuild start at %jd.",
 	    sd->sd_volume->v_name, sd->sd_pos,
 	    sd->sd_disk ? g_raid_get_diskname(sd->sd_disk) : "[none]",
 	    trs->trso_failed_sd->sd_rebuild_pos);
 	trs->trso_type = TR_RAID1E_REBUILD;
 	trs->trso_buffer = malloc(g_raid1e_rebuild_slab, M_TR_RAID1E, M_WAITOK);
 	trs->trso_meta_update = g_raid1e_rebuild_meta_update;
 	g_raid_tr_raid1e_rebuild_some(tr);
 }
 
 static void
 g_raid_tr_raid1e_maybe_rebuild(struct g_raid_tr_object *tr,
     struct g_raid_subdisk *sd)
 {
 	struct g_raid_volume *vol;
 	struct g_raid_tr_raid1e_object *trs;
 	int nr;
 	
 	vol = tr->tro_volume;
 	trs = (struct g_raid_tr_raid1e_object *)tr;
 	if (trs->trso_stopping)
 		return;
 	nr = g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_REBUILD) +
 	    g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_RESYNC);
 	switch(trs->trso_type) {
 	case TR_RAID1E_NONE:
 		if (vol->v_state < G_RAID_VOLUME_S_DEGRADED)
 			return;
 		if (nr == 0) {
 			nr = g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_NEW) +
 			    g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_STALE) +
 			    g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_UNINITIALIZED);
 			if (nr == 0)
 				return;
 		}
 		g_raid_tr_raid1e_rebuild_start(tr);
 		break;
 	case TR_RAID1E_REBUILD:
 		if (vol->v_state < G_RAID_VOLUME_S_DEGRADED || nr == 0 ||
 		    trs->trso_failed_sd == sd)
 			g_raid_tr_raid1e_rebuild_abort(tr);
 		break;
 	case TR_RAID1E_RESYNC:
 		break;
 	}
 }
 
 static int
 g_raid_tr_event_raid1e(struct g_raid_tr_object *tr,
     struct g_raid_subdisk *sd, u_int event)
 {
 
 	g_raid_tr_update_state_raid1e(tr->tro_volume, sd);
 	return (0);
 }
 
 static int
 g_raid_tr_start_raid1e(struct g_raid_tr_object *tr)
 {
 	struct g_raid_tr_raid1e_object *trs;
 	struct g_raid_volume *vol;
 
 	trs = (struct g_raid_tr_raid1e_object *)tr;
 	vol = tr->tro_volume;
 	trs->trso_starting = 0;
 	g_raid_tr_update_state_raid1e(vol, NULL);
 	return (0);
 }
 
 static int
 g_raid_tr_stop_raid1e(struct g_raid_tr_object *tr)
 {
 	struct g_raid_tr_raid1e_object *trs;
 	struct g_raid_volume *vol;
 
 	trs = (struct g_raid_tr_raid1e_object *)tr;
 	vol = tr->tro_volume;
 	trs->trso_starting = 0;
 	trs->trso_stopping = 1;
 	g_raid_tr_update_state_raid1e(vol, NULL);
 	return (0);
 }
 
 /*
  * Select the disk to read from.  Take into account: subdisk state, running
  * error recovery, average disk load, head position and possible cache hits.
  */
 #define ABS(x)		(((x) >= 0) ? (x) : (-(x)))
 static int
 g_raid_tr_raid1e_select_read_disk(struct g_raid_volume *vol,
     int no, off_t off, off_t len, u_int mask)
 {
 	struct g_raid_subdisk *sd;
 	off_t offset;
 	int i, best, prio, bestprio;
 
 	best = -1;
 	bestprio = INT_MAX;
 	for (i = 0; i < N; i++) {
 		sd = &vol->v_subdisks[(no + i) % vol->v_disks_count];
 		offset = off;
 		if (no + i >= vol->v_disks_count)
 			offset += vol->v_strip_size;
 
 		prio = G_RAID_SUBDISK_LOAD(sd);
 		if ((mask & (1 << sd->sd_pos)) != 0)
 			continue;
 		switch (sd->sd_state) {
 		case G_RAID_SUBDISK_S_ACTIVE:
 			break;
 		case G_RAID_SUBDISK_S_RESYNC:
 			if (offset + off < sd->sd_rebuild_pos)
 				break;
 			/* FALLTHROUGH */
 		case G_RAID_SUBDISK_S_STALE:
 			prio += i << 24;
 			break;
 		case G_RAID_SUBDISK_S_REBUILD:
 			if (offset + off < sd->sd_rebuild_pos)
 				break;
 			/* FALLTHROUGH */
 		default:
 			continue;
 		}
 		prio += min(sd->sd_recovery, 255) << 16;
 		/* If disk head is precisely in position - highly prefer it. */
 		if (G_RAID_SUBDISK_POS(sd) == offset)
 			prio -= 2 * G_RAID_SUBDISK_LOAD_SCALE;
 		else
 		/* If disk head is close to position - prefer it. */
 		if (ABS(G_RAID_SUBDISK_POS(sd) - offset) <
 		    G_RAID_SUBDISK_TRACK_SIZE)
 			prio -= 1 * G_RAID_SUBDISK_LOAD_SCALE;
 		if (prio < bestprio) {
 			bestprio = prio;
 			best = i;
 		}
 	}
 	return (best);
 }
 
 static void
 g_raid_tr_iostart_raid1e_read(struct g_raid_tr_object *tr, struct bio *bp)
 {
 	struct g_raid_volume *vol;
 	struct g_raid_subdisk *sd;
 	struct bio_queue_head queue;
 	struct bio *cbp;
 	char *addr;
 	off_t offset, start, length, remain;
 	u_int no, strip_size;
 	int best;
 
 	vol = tr->tro_volume;
 	if ((bp->bio_flags & BIO_UNMAPPED) != 0)
 		addr = NULL;
 	else
 		addr = bp->bio_data;
 	strip_size = vol->v_strip_size;
 	V2P(vol, bp->bio_offset, &no, &offset, &start);
 	remain = bp->bio_length;
 	bioq_init(&queue);
 	while (remain > 0) {
 		length = MIN(strip_size - start, remain);
 		best = g_raid_tr_raid1e_select_read_disk(vol,
 		    no, offset, length, 0);
 		KASSERT(best >= 0, ("No readable disk in volume %s!",
 		    vol->v_name));
 		no += best;
 		if (no >= vol->v_disks_count) {
 			no -= vol->v_disks_count;
 			offset += strip_size;
 		}
 		cbp = g_clone_bio(bp);
 		if (cbp == NULL)
 			goto failure;
 		cbp->bio_offset = offset + start;
 		cbp->bio_length = length;
 		if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
 			cbp->bio_ma_offset += (uintptr_t)addr;
 			cbp->bio_ma += cbp->bio_ma_offset / PAGE_SIZE;
 			cbp->bio_ma_offset %= PAGE_SIZE;
 			cbp->bio_ma_n = round_page(cbp->bio_ma_offset +
 			    cbp->bio_length) / PAGE_SIZE;
 		} else
 			cbp->bio_data = addr;
 		cbp->bio_caller1 = &vol->v_subdisks[no];
 		bioq_insert_tail(&queue, cbp);
 		no += N - best;
 		if (no >= vol->v_disks_count) {
 			no -= vol->v_disks_count;
 			offset += strip_size;
 		}
 		remain -= length;
 		addr += length;
 		start = 0;
 	}
 	while ((cbp = bioq_takefirst(&queue)) != NULL) {
 		sd = cbp->bio_caller1;
 		cbp->bio_caller1 = NULL;
 		g_raid_subdisk_iostart(sd, cbp);
 	}
 	return;
 failure:
 	while ((cbp = bioq_takefirst(&queue)) != NULL)
 		g_destroy_bio(cbp);
 	if (bp->bio_error == 0)
 		bp->bio_error = ENOMEM;
 	g_raid_iodone(bp, bp->bio_error);
 }
 
 static void
 g_raid_tr_iostart_raid1e_write(struct g_raid_tr_object *tr, struct bio *bp)
 {
 	struct g_raid_volume *vol;
 	struct g_raid_subdisk *sd;
 	struct bio_queue_head queue;
 	struct bio *cbp;
 	char *addr;
 	off_t offset, start, length, remain;
 	u_int no, strip_size;
 	int i;
 
 	vol = tr->tro_volume;
 	if ((bp->bio_flags & BIO_UNMAPPED) != 0)
 		addr = NULL;
 	else
 		addr = bp->bio_data;
 	strip_size = vol->v_strip_size;
 	V2P(vol, bp->bio_offset, &no, &offset, &start);
 	remain = bp->bio_length;
 	bioq_init(&queue);
 	while (remain > 0) {
 		length = MIN(strip_size - start, remain);
 		for (i = 0; i < N; i++) {
 			sd = &vol->v_subdisks[no];
 			switch (sd->sd_state) {
 			case G_RAID_SUBDISK_S_ACTIVE:
 			case G_RAID_SUBDISK_S_STALE:
 			case G_RAID_SUBDISK_S_RESYNC:
 				break;
 			case G_RAID_SUBDISK_S_REBUILD:
 				if (offset + start >= sd->sd_rebuild_pos)
 					goto nextdisk;
 				break;
 			default:
 				goto nextdisk;
 			}
 			cbp = g_clone_bio(bp);
 			if (cbp == NULL)
 				goto failure;
 			cbp->bio_offset = offset + start;
 			cbp->bio_length = length;
 			if ((bp->bio_flags & BIO_UNMAPPED) != 0 &&
 			    bp->bio_cmd != BIO_DELETE) {
 				cbp->bio_ma_offset += (uintptr_t)addr;
 				cbp->bio_ma += cbp->bio_ma_offset / PAGE_SIZE;
 				cbp->bio_ma_offset %= PAGE_SIZE;
 				cbp->bio_ma_n = round_page(cbp->bio_ma_offset +
 				    cbp->bio_length) / PAGE_SIZE;
 			} else
 				cbp->bio_data = addr;
 			cbp->bio_caller1 = sd;
 			bioq_insert_tail(&queue, cbp);
 nextdisk:
 			if (++no >= vol->v_disks_count) {
 				no = 0;
 				offset += strip_size;
 			}
 		}
 		remain -= length;
 		if (bp->bio_cmd != BIO_DELETE)
 			addr += length;
 		start = 0;
 	}
 	while ((cbp = bioq_takefirst(&queue)) != NULL) {
 		sd = cbp->bio_caller1;
 		cbp->bio_caller1 = NULL;
 		g_raid_subdisk_iostart(sd, cbp);
 	}
 	return;
 failure:
 	while ((cbp = bioq_takefirst(&queue)) != NULL)
 		g_destroy_bio(cbp);
 	if (bp->bio_error == 0)
 		bp->bio_error = ENOMEM;
 	g_raid_iodone(bp, bp->bio_error);
 }
 
 static void
 g_raid_tr_iostart_raid1e(struct g_raid_tr_object *tr, struct bio *bp)
 {
 	struct g_raid_volume *vol;
 	struct g_raid_tr_raid1e_object *trs;
 
 	vol = tr->tro_volume;
 	trs = (struct g_raid_tr_raid1e_object *)tr;
 	if (vol->v_state != G_RAID_VOLUME_S_OPTIMAL &&
 	    vol->v_state != G_RAID_VOLUME_S_SUBOPTIMAL &&
 	    vol->v_state != G_RAID_VOLUME_S_DEGRADED) {
 		g_raid_iodone(bp, EIO);
 		return;
 	}
 	/*
 	 * If we're rebuilding, squeeze in rebuild activity every so often,
 	 * even when the disk is busy.  Be sure to only count real I/O
 	 * to the disk.  All 'SPECIAL' I/O is traffic generated to the disk
 	 * by this module.
 	 */
 	if (trs->trso_failed_sd != NULL &&
 	    !(bp->bio_cflags & G_RAID_BIO_FLAG_SPECIAL)) {
 		/* Make this new or running now round short. */
 		trs->trso_recover_slabs = 0;
 		if (--trs->trso_fair_io <= 0) {
 			trs->trso_fair_io = g_raid1e_rebuild_fair_io;
 			g_raid_tr_raid1e_rebuild_some(tr);
 		}
 	}
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 		g_raid_tr_iostart_raid1e_read(tr, bp);
 		break;
 	case BIO_WRITE:
 	case BIO_DELETE:
 		g_raid_tr_iostart_raid1e_write(tr, bp);
 		break;
 	case BIO_FLUSH:
 		g_raid_tr_flush_common(tr, bp);
 		break;
 	default:
 		KASSERT(1 == 0, ("Invalid command here: %u (volume=%s)",
 		    bp->bio_cmd, vol->v_name));
 		break;
 	}
 }
 
 static void
 g_raid_tr_iodone_raid1e(struct g_raid_tr_object *tr,
     struct g_raid_subdisk *sd, struct bio *bp)
 {
 	struct bio *cbp;
 	struct g_raid_subdisk *nsd;
 	struct g_raid_volume *vol;
 	struct bio *pbp;
 	struct g_raid_tr_raid1e_object *trs;
 	off_t virtual, offset, start;
 	uintptr_t mask;
 	int error, do_write, copy, disk, best;
 
 	trs = (struct g_raid_tr_raid1e_object *)tr;
 	vol = tr->tro_volume;
 	if (bp->bio_cflags & G_RAID_BIO_FLAG_SYNC) {
 		if (trs->trso_type == TR_RAID1E_REBUILD) {
 			nsd = trs->trso_failed_sd;
 			if (bp->bio_cmd == BIO_READ) {
 
 				/* Immediately abort rebuild, if requested. */
 				if (trs->trso_flags & TR_RAID1E_F_ABORT) {
 					trs->trso_flags &= ~TR_RAID1E_F_DOING_SOME;
 					g_raid_tr_raid1e_rebuild_abort(tr);
 					return;
 				}
 
 				/* On read error, skip and cross fingers. */
 				if (bp->bio_error != 0) {
 					G_RAID_LOGREQ(0, bp,
 					    "Read error during rebuild (%d), "
 					    "possible data loss!",
 					    bp->bio_error);
 					goto rebuild_round_done;
 				}
 
 				/*
 				 * The read operation finished, queue the
 				 * write and get out.
 				 */
 				G_RAID_LOGREQ(3, bp, "Rebuild read done: %d",
 				    bp->bio_error);
 				bp->bio_cmd = BIO_WRITE;
 				bp->bio_cflags = G_RAID_BIO_FLAG_SYNC;
 				bp->bio_offset = nsd->sd_rebuild_pos;
 				G_RAID_LOGREQ(3, bp, "Queueing rebuild write.");
 				g_raid_subdisk_iostart(nsd, bp);
 			} else {
 				/*
 				 * The write operation just finished.  Do
 				 * another.  We keep cloning the master bio
 				 * since it has the right buffers allocated to
 				 * it.
 				 */
 				G_RAID_LOGREQ(3, bp, "Rebuild write done: %d",
 				    bp->bio_error);
 				if (bp->bio_error != 0 ||
 				    trs->trso_flags & TR_RAID1E_F_ABORT) {
 					if ((trs->trso_flags &
 					    TR_RAID1E_F_ABORT) == 0) {
 						g_raid_tr_raid1e_fail_disk(sd->sd_softc,
 						    nsd, nsd->sd_disk);
 					}
 					trs->trso_flags &= ~TR_RAID1E_F_DOING_SOME;
 					g_raid_tr_raid1e_rebuild_abort(tr);
 					return;
 				}
 rebuild_round_done:
 				trs->trso_flags &= ~TR_RAID1E_F_LOCKED;
 				g_raid_unlock_range(tr->tro_volume,
 				    trs->trso_lock_pos, trs->trso_lock_len);
 				nsd->sd_rebuild_pos += bp->bio_length;
 				if (nsd->sd_rebuild_pos >= nsd->sd_size) {
 					g_raid_tr_raid1e_rebuild_finish(tr);
 					return;
 				}
 
 				/* Abort rebuild if we are stopping */
 				if (trs->trso_stopping) {
 					trs->trso_flags &= ~TR_RAID1E_F_DOING_SOME;
 					g_raid_tr_raid1e_rebuild_abort(tr);
 					return;
 				}
 
 				if (--trs->trso_meta_update <= 0) {
 					g_raid_write_metadata(vol->v_softc,
 					    vol, nsd, nsd->sd_disk);
 					trs->trso_meta_update =
 					    g_raid1e_rebuild_meta_update;
 					/* Compensate short rebuild I/Os. */
 					if ((vol->v_disks_count % N) != 0 &&
 					    vol->v_strip_size <
 					     g_raid1e_rebuild_slab) {
 						trs->trso_meta_update *=
 						    g_raid1e_rebuild_slab;
 						trs->trso_meta_update /=
 						    vol->v_strip_size;
 					}
 				}
 				trs->trso_flags &= ~TR_RAID1E_F_DOING_SOME;
 				if (--trs->trso_recover_slabs <= 0)
 					return;
 				/* Run next rebuild iteration. */
 				g_raid_tr_raid1e_rebuild_some(tr);
 			}
 		} else if (trs->trso_type == TR_RAID1E_RESYNC) {
 			/*
 			 * read good sd, read bad sd in parallel.  when both
 			 * done, compare the buffers.  write good to the bad
 			 * if different.  do the next bit of work.
 			 */
 			panic("Somehow, we think we're doing a resync");
 		}
 		return;
 	}
 	pbp = bp->bio_parent;
 	pbp->bio_inbed++;
 	mask = (intptr_t)bp->bio_caller2;
 	if (bp->bio_cmd == BIO_READ && bp->bio_error != 0) {
 		/*
 		 * Read failed on first drive.  Retry the read error on
 		 * another disk drive, if available, before erroring out the
 		 * read.
 		 */
 		sd->sd_disk->d_read_errs++;
 		G_RAID_LOGREQ(0, bp,
 		    "Read error (%d), %d read errors total",
 		    bp->bio_error, sd->sd_disk->d_read_errs);
 
 		/*
 		 * If there are too many read errors, we move to degraded.
 		 * XXX Do we want to FAIL the drive (eg, make the user redo
 		 * everything to get it back in sync), or just degrade the
 		 * drive, which kicks off a resync?
 		 */
 		do_write = 0;
 		if (sd->sd_disk->d_read_errs > g_raid_read_err_thresh)
 			g_raid_tr_raid1e_fail_disk(sd->sd_softc, sd, sd->sd_disk);
 		else if (mask == 0)
 			do_write = 1;
 
 		/* Restore what we were doing. */
 		P2V(vol, sd->sd_pos, bp->bio_offset, &virtual, &copy);
 		V2P(vol, virtual, &disk, &offset, &start);
 
 		/* Find the other disk, and try to do the I/O to it. */
 		mask |= 1 << copy;
 		best = g_raid_tr_raid1e_select_read_disk(vol,
 		    disk, offset, start, mask);
 		if (best >= 0 && (cbp = g_clone_bio(pbp)) != NULL) {
 			disk += best;
 			if (disk >= vol->v_disks_count) {
 				disk -= vol->v_disks_count;
 				offset += vol->v_strip_size;
 			}
 			cbp->bio_offset = offset + start;
 			cbp->bio_length = bp->bio_length;
 			cbp->bio_data = bp->bio_data;
 			cbp->bio_ma = bp->bio_ma;
 			cbp->bio_ma_offset = bp->bio_ma_offset;
 			cbp->bio_ma_n = bp->bio_ma_n;
 			g_destroy_bio(bp);
 			nsd = &vol->v_subdisks[disk];
 			G_RAID_LOGREQ(2, cbp, "Retrying read from %d",
 			    nsd->sd_pos);
 			if (do_write)
 				mask |= 1 << 31;
 			if ((mask & (1U << 31)) != 0)
 				sd->sd_recovery++;
 			cbp->bio_caller2 = (void *)mask;
 			if (do_write) {
 				cbp->bio_caller1 = nsd;
 				/* Lock callback starts I/O */
 				g_raid_lock_range(sd->sd_volume,
 				    virtual, cbp->bio_length, pbp, cbp);
 			} else {
 				g_raid_subdisk_iostart(nsd, cbp);
 			}
 			return;
 		}
 		/*
 		 * We can't retry.  Return the original error by falling
 		 * through.  This will happen when there's only one good disk.
 		 * We don't need to fail the raid, since its actual state is
 		 * based on the state of the subdisks.
 		 */
 		G_RAID_LOGREQ(2, bp, "Couldn't retry read, failing it");
 	}
 	if (bp->bio_cmd == BIO_READ &&
 	    bp->bio_error == 0 &&
 	    (mask & (1U << 31)) != 0) {
 		G_RAID_LOGREQ(3, bp, "Recovered data from other drive");
 
 		/* Restore what we were doing. */
 		P2V(vol, sd->sd_pos, bp->bio_offset, &virtual, &copy);
 		V2P(vol, virtual, &disk, &offset, &start);
 
 		/* Find best disk to write. */
 		best = g_raid_tr_raid1e_select_read_disk(vol,
 		    disk, offset, start, ~mask);
 		if (best >= 0 && (cbp = g_clone_bio(pbp)) != NULL) {
 			disk += best;
 			if (disk >= vol->v_disks_count) {
 				disk -= vol->v_disks_count;
 				offset += vol->v_strip_size;
 			}
 			cbp->bio_offset = offset + start;
 			cbp->bio_cmd = BIO_WRITE;
 			cbp->bio_cflags = G_RAID_BIO_FLAG_REMAP;
 			cbp->bio_caller2 = (void *)mask;
 			g_destroy_bio(bp);
 			G_RAID_LOGREQ(2, cbp,
 			    "Attempting bad sector remap on failing drive.");
 			g_raid_subdisk_iostart(&vol->v_subdisks[disk], cbp);
 			return;
 		}
 	}
 	if ((mask & (1U << 31)) != 0) {
 		/*
 		 * We're done with a recovery, mark the range as unlocked.
 		 * For any write errors, we aggressively fail the disk since
 		 * there was both a READ and a WRITE error at this location.
 		 * Both types of errors generally indicates the drive is on
 		 * the verge of total failure anyway.  Better to stop trusting
 		 * it now.  However, we need to reset error to 0 in that case
 		 * because we're not failing the original I/O which succeeded.
 		 */
 
 		/* Restore what we were doing. */
 		P2V(vol, sd->sd_pos, bp->bio_offset, &virtual, &copy);
 		V2P(vol, virtual, &disk, &offset, &start);
 
 		for (copy = 0; copy < N; copy++) {
 			if ((mask & (1 << copy) ) != 0)
 				vol->v_subdisks[(disk + copy) %
 				    vol->v_disks_count].sd_recovery--;
 		}
 
 		if (bp->bio_cmd == BIO_WRITE && bp->bio_error) {
 			G_RAID_LOGREQ(0, bp, "Remap write failed: "
 			    "failing subdisk.");
 			g_raid_tr_raid1e_fail_disk(sd->sd_softc, sd, sd->sd_disk);
 			bp->bio_error = 0;
 		}
 		G_RAID_LOGREQ(2, bp, "REMAP done %d.", bp->bio_error);
 		g_raid_unlock_range(sd->sd_volume, virtual, bp->bio_length);
 	}
 	if (pbp->bio_cmd != BIO_READ) {
 		if (pbp->bio_inbed == 1 || pbp->bio_error != 0)
 			pbp->bio_error = bp->bio_error;
 		if (pbp->bio_cmd == BIO_WRITE && bp->bio_error != 0) {
 			G_RAID_LOGREQ(0, bp, "Write failed: failing subdisk.");
 			g_raid_tr_raid1e_fail_disk(sd->sd_softc, sd, sd->sd_disk);
 		}
 		error = pbp->bio_error;
 	} else
 		error = bp->bio_error;
 	g_destroy_bio(bp);
 	if (pbp->bio_children == pbp->bio_inbed) {
 		pbp->bio_completed = pbp->bio_length;
 		g_raid_iodone(pbp, error);
 	}
 }
 
 static int
 g_raid_tr_kerneldump_raid1e(struct g_raid_tr_object *tr,
     void *virtual, vm_offset_t physical, off_t boffset, size_t blength)
 {
 	struct g_raid_volume *vol;
 	struct g_raid_subdisk *sd;
 	struct bio_queue_head queue;
 	char *addr;
 	off_t offset, start, length, remain;
 	u_int no, strip_size;
 	int i, error;
 
 	vol = tr->tro_volume;
 	addr = virtual;
 	strip_size = vol->v_strip_size;
 	V2P(vol, boffset, &no, &offset, &start);
 	remain = blength;
 	bioq_init(&queue);
 	while (remain > 0) {
 		length = MIN(strip_size - start, remain);
 		for (i = 0; i < N; i++) {
 			sd = &vol->v_subdisks[no];
 			switch (sd->sd_state) {
 			case G_RAID_SUBDISK_S_ACTIVE:
 			case G_RAID_SUBDISK_S_STALE:
 			case G_RAID_SUBDISK_S_RESYNC:
 				break;
 			case G_RAID_SUBDISK_S_REBUILD:
 				if (offset + start >= sd->sd_rebuild_pos)
 					goto nextdisk;
 				break;
 			default:
 				goto nextdisk;
 			}
 			error = g_raid_subdisk_kerneldump(sd,
 			    addr, 0, offset + start, length);
 			if (error != 0)
 				return (error);
 nextdisk:
 			if (++no >= vol->v_disks_count) {
 				no = 0;
 				offset += strip_size;
 			}
 		}
 		remain -= length;
 		addr += length;
 		start = 0;
 	}
 	return (0);
 }
 
 static int
 g_raid_tr_locked_raid1e(struct g_raid_tr_object *tr, void *argp)
 {
 	struct bio *bp;
 	struct g_raid_subdisk *sd;
 
 	bp = (struct bio *)argp;
 	sd = (struct g_raid_subdisk *)bp->bio_caller1;
 	g_raid_subdisk_iostart(sd, bp);
 
 	return (0);
 }
 
 static int
 g_raid_tr_idle_raid1e(struct g_raid_tr_object *tr)
 {
 	struct g_raid_tr_raid1e_object *trs;
 	struct g_raid_volume *vol;
 
 	vol = tr->tro_volume;
 	trs = (struct g_raid_tr_raid1e_object *)tr;
 	trs->trso_fair_io = g_raid1e_rebuild_fair_io;
 	trs->trso_recover_slabs = g_raid1e_rebuild_cluster_idle;
 	/* Compensate short rebuild I/Os. */
 	if ((vol->v_disks_count % N) != 0 &&
 	    vol->v_strip_size < g_raid1e_rebuild_slab) {
 		trs->trso_recover_slabs *= g_raid1e_rebuild_slab;
 		trs->trso_recover_slabs /= vol->v_strip_size;
 	}
 	if (trs->trso_type == TR_RAID1E_REBUILD)
 		g_raid_tr_raid1e_rebuild_some(tr);
 	return (0);
 }
 
 static int
 g_raid_tr_free_raid1e(struct g_raid_tr_object *tr)
 {
 	struct g_raid_tr_raid1e_object *trs;
 
 	trs = (struct g_raid_tr_raid1e_object *)tr;
 
 	if (trs->trso_buffer != NULL) {
 		free(trs->trso_buffer, M_TR_RAID1E);
 		trs->trso_buffer = NULL;
 	}
 	return (0);
 }
 
 G_RAID_TR_DECLARE(raid1e, "RAID1E");
Index: head/sys/geom/raid3/g_raid3.c
===================================================================
--- head/sys/geom/raid3/g_raid3.c	(revision 350693)
+++ head/sys/geom/raid3/g_raid3.c	(revision 350694)
@@ -1,3586 +1,3587 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2004-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/bio.h>
 #include <sys/sbuf.h>
 #include <sys/sysctl.h>
 #include <sys/malloc.h>
 #include <sys/eventhandler.h>
 #include <vm/uma.h>
 #include <geom/geom.h>
+#include <geom/geom_dbg.h>
 #include <sys/proc.h>
 #include <sys/kthread.h>
 #include <sys/sched.h>
 #include <geom/raid3/g_raid3.h>
 
 FEATURE(geom_raid3, "GEOM RAID-3 functionality");
 
 static MALLOC_DEFINE(M_RAID3, "raid3_data", "GEOM_RAID3 Data");
 
 SYSCTL_DECL(_kern_geom);
 static SYSCTL_NODE(_kern_geom, OID_AUTO, raid3, CTLFLAG_RW, 0,
     "GEOM_RAID3 stuff");
 u_int g_raid3_debug = 0;
 SYSCTL_UINT(_kern_geom_raid3, OID_AUTO, debug, CTLFLAG_RWTUN, &g_raid3_debug, 0,
     "Debug level");
 static u_int g_raid3_timeout = 4;
 SYSCTL_UINT(_kern_geom_raid3, OID_AUTO, timeout, CTLFLAG_RWTUN, &g_raid3_timeout,
     0, "Time to wait on all raid3 components");
 static u_int g_raid3_idletime = 5;
 SYSCTL_UINT(_kern_geom_raid3, OID_AUTO, idletime, CTLFLAG_RWTUN,
     &g_raid3_idletime, 0, "Mark components as clean when idling");
 static u_int g_raid3_disconnect_on_failure = 1;
 SYSCTL_UINT(_kern_geom_raid3, OID_AUTO, disconnect_on_failure, CTLFLAG_RWTUN,
     &g_raid3_disconnect_on_failure, 0, "Disconnect component on I/O failure.");
 static u_int g_raid3_syncreqs = 2;
 SYSCTL_UINT(_kern_geom_raid3, OID_AUTO, sync_requests, CTLFLAG_RDTUN,
     &g_raid3_syncreqs, 0, "Parallel synchronization I/O requests.");
 static u_int g_raid3_use_malloc = 0;
 SYSCTL_UINT(_kern_geom_raid3, OID_AUTO, use_malloc, CTLFLAG_RDTUN,
     &g_raid3_use_malloc, 0, "Use malloc(9) instead of uma(9).");
 
 static u_int g_raid3_n64k = 50;
 SYSCTL_UINT(_kern_geom_raid3, OID_AUTO, n64k, CTLFLAG_RDTUN, &g_raid3_n64k, 0,
     "Maximum number of 64kB allocations");
 static u_int g_raid3_n16k = 200;
 SYSCTL_UINT(_kern_geom_raid3, OID_AUTO, n16k, CTLFLAG_RDTUN, &g_raid3_n16k, 0,
     "Maximum number of 16kB allocations");
 static u_int g_raid3_n4k = 1200;
 SYSCTL_UINT(_kern_geom_raid3, OID_AUTO, n4k, CTLFLAG_RDTUN, &g_raid3_n4k, 0,
     "Maximum number of 4kB allocations");
 
 static SYSCTL_NODE(_kern_geom_raid3, OID_AUTO, stat, CTLFLAG_RW, 0,
     "GEOM_RAID3 statistics");
 static u_int g_raid3_parity_mismatch = 0;
 SYSCTL_UINT(_kern_geom_raid3_stat, OID_AUTO, parity_mismatch, CTLFLAG_RD,
     &g_raid3_parity_mismatch, 0, "Number of failures in VERIFY mode");
 
 #define	MSLEEP(ident, mtx, priority, wmesg, timeout)	do {		\
 	G_RAID3_DEBUG(4, "%s: Sleeping %p.", __func__, (ident));	\
 	msleep((ident), (mtx), (priority), (wmesg), (timeout));		\
 	G_RAID3_DEBUG(4, "%s: Woken up %p.", __func__, (ident));	\
 } while (0)
 
 static eventhandler_tag g_raid3_post_sync = NULL;
 static int g_raid3_shutdown = 0;
 
 static int g_raid3_destroy_geom(struct gctl_req *req, struct g_class *mp,
     struct g_geom *gp);
 static g_taste_t g_raid3_taste;
 static void g_raid3_init(struct g_class *mp);
 static void g_raid3_fini(struct g_class *mp);
 
 struct g_class g_raid3_class = {
 	.name = G_RAID3_CLASS_NAME,
 	.version = G_VERSION,
 	.ctlreq = g_raid3_config,
 	.taste = g_raid3_taste,
 	.destroy_geom = g_raid3_destroy_geom,
 	.init = g_raid3_init,
 	.fini = g_raid3_fini
 };
 
 
 static void g_raid3_destroy_provider(struct g_raid3_softc *sc);
 static int g_raid3_update_disk(struct g_raid3_disk *disk, u_int state);
 static void g_raid3_update_device(struct g_raid3_softc *sc, boolean_t force);
 static void g_raid3_dumpconf(struct sbuf *sb, const char *indent,
     struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp);
 static void g_raid3_sync_stop(struct g_raid3_softc *sc, int type);
 static int g_raid3_register_request(struct bio *pbp);
 static void g_raid3_sync_release(struct g_raid3_softc *sc);
 
 
 static const char *
 g_raid3_disk_state2str(int state)
 {
 
 	switch (state) {
 	case G_RAID3_DISK_STATE_NODISK:
 		return ("NODISK");
 	case G_RAID3_DISK_STATE_NONE:
 		return ("NONE");
 	case G_RAID3_DISK_STATE_NEW:
 		return ("NEW");
 	case G_RAID3_DISK_STATE_ACTIVE:
 		return ("ACTIVE");
 	case G_RAID3_DISK_STATE_STALE:
 		return ("STALE");
 	case G_RAID3_DISK_STATE_SYNCHRONIZING:
 		return ("SYNCHRONIZING");
 	case G_RAID3_DISK_STATE_DISCONNECTED:
 		return ("DISCONNECTED");
 	default:
 		return ("INVALID");
 	}
 }
 
 static const char *
 g_raid3_device_state2str(int state)
 {
 
 	switch (state) {
 	case G_RAID3_DEVICE_STATE_STARTING:
 		return ("STARTING");
 	case G_RAID3_DEVICE_STATE_DEGRADED:
 		return ("DEGRADED");
 	case G_RAID3_DEVICE_STATE_COMPLETE:
 		return ("COMPLETE");
 	default:
 		return ("INVALID");
 	}
 }
 
 const char *
 g_raid3_get_diskname(struct g_raid3_disk *disk)
 {
 
 	if (disk->d_consumer == NULL || disk->d_consumer->provider == NULL)
 		return ("[unknown]");
 	return (disk->d_name);
 }
 
 static void *
 g_raid3_alloc(struct g_raid3_softc *sc, size_t size, int flags)
 {
 	void *ptr;
 	enum g_raid3_zones zone;
 
 	if (g_raid3_use_malloc ||
 	    (zone = g_raid3_zone(size)) == G_RAID3_NUM_ZONES)
 		ptr = malloc(size, M_RAID3, flags);
 	else {
 		ptr = uma_zalloc_arg(sc->sc_zones[zone].sz_zone,
 		   &sc->sc_zones[zone], flags);
 		sc->sc_zones[zone].sz_requested++;
 		if (ptr == NULL)
 			sc->sc_zones[zone].sz_failed++;
 	}
 	return (ptr);
 }
 
 static void
 g_raid3_free(struct g_raid3_softc *sc, void *ptr, size_t size)
 {
 	enum g_raid3_zones zone;
 
 	if (g_raid3_use_malloc ||
 	    (zone = g_raid3_zone(size)) == G_RAID3_NUM_ZONES)
 		free(ptr, M_RAID3);
 	else {
 		uma_zfree_arg(sc->sc_zones[zone].sz_zone,
 		    ptr, &sc->sc_zones[zone]);
 	}
 }
 
 static int
 g_raid3_uma_ctor(void *mem, int size, void *arg, int flags)
 {
 	struct g_raid3_zone *sz = arg;
 
 	if (sz->sz_max > 0 && sz->sz_inuse == sz->sz_max)
 		return (ENOMEM);
 	sz->sz_inuse++;
 	return (0);
 }
 
 static void
 g_raid3_uma_dtor(void *mem, int size, void *arg)
 {
 	struct g_raid3_zone *sz = arg;
 
 	sz->sz_inuse--;
 }
 
 #define	g_raid3_xor(src, dst, size)					\
 	_g_raid3_xor((uint64_t *)(src),					\
 	    (uint64_t *)(dst), (size_t)size)
 static void
 _g_raid3_xor(uint64_t *src, uint64_t *dst, size_t size)
 {
 
 	KASSERT((size % 128) == 0, ("Invalid size: %zu.", size));
 	for (; size > 0; size -= 128) {
 		*dst++ ^= (*src++);
 		*dst++ ^= (*src++);
 		*dst++ ^= (*src++);
 		*dst++ ^= (*src++);
 		*dst++ ^= (*src++);
 		*dst++ ^= (*src++);
 		*dst++ ^= (*src++);
 		*dst++ ^= (*src++);
 		*dst++ ^= (*src++);
 		*dst++ ^= (*src++);
 		*dst++ ^= (*src++);
 		*dst++ ^= (*src++);
 		*dst++ ^= (*src++);
 		*dst++ ^= (*src++);
 		*dst++ ^= (*src++);
 		*dst++ ^= (*src++);
 	}
 }
 
 static int
 g_raid3_is_zero(struct bio *bp)
 {
 	static const uint64_t zeros[] = {
 	    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 	};
 	u_char *addr;
 	ssize_t size;
 
 	size = bp->bio_length;
 	addr = (u_char *)bp->bio_data;
 	for (; size > 0; size -= sizeof(zeros), addr += sizeof(zeros)) {
 		if (bcmp(addr, zeros, sizeof(zeros)) != 0)
 			return (0);
 	}
 	return (1);
 }
 
 /*
  * --- Events handling functions ---
  * Events in geom_raid3 are used to maintain disks and device status
  * from one thread to simplify locking.
  */
 static void
 g_raid3_event_free(struct g_raid3_event *ep)
 {
 
 	free(ep, M_RAID3);
 }
 
 int
 g_raid3_event_send(void *arg, int state, int flags)
 {
 	struct g_raid3_softc *sc;
 	struct g_raid3_disk *disk;
 	struct g_raid3_event *ep;
 	int error;
 
 	ep = malloc(sizeof(*ep), M_RAID3, M_WAITOK);
 	G_RAID3_DEBUG(4, "%s: Sending event %p.", __func__, ep);
 	if ((flags & G_RAID3_EVENT_DEVICE) != 0) {
 		disk = NULL;
 		sc = arg;
 	} else {
 		disk = arg;
 		sc = disk->d_softc;
 	}
 	ep->e_disk = disk;
 	ep->e_state = state;
 	ep->e_flags = flags;
 	ep->e_error = 0;
 	mtx_lock(&sc->sc_events_mtx);
 	TAILQ_INSERT_TAIL(&sc->sc_events, ep, e_next);
 	mtx_unlock(&sc->sc_events_mtx);
 	G_RAID3_DEBUG(4, "%s: Waking up %p.", __func__, sc);
 	mtx_lock(&sc->sc_queue_mtx);
 	wakeup(sc);
 	wakeup(&sc->sc_queue);
 	mtx_unlock(&sc->sc_queue_mtx);
 	if ((flags & G_RAID3_EVENT_DONTWAIT) != 0)
 		return (0);
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 	G_RAID3_DEBUG(4, "%s: Sleeping %p.", __func__, ep);
 	sx_xunlock(&sc->sc_lock);
 	while ((ep->e_flags & G_RAID3_EVENT_DONE) == 0) {
 		mtx_lock(&sc->sc_events_mtx);
 		MSLEEP(ep, &sc->sc_events_mtx, PRIBIO | PDROP, "r3:event",
 		    hz * 5);
 	}
 	error = ep->e_error;
 	g_raid3_event_free(ep);
 	sx_xlock(&sc->sc_lock);
 	return (error);
 }
 
 static struct g_raid3_event *
 g_raid3_event_get(struct g_raid3_softc *sc)
 {
 	struct g_raid3_event *ep;
 
 	mtx_lock(&sc->sc_events_mtx);
 	ep = TAILQ_FIRST(&sc->sc_events);
 	mtx_unlock(&sc->sc_events_mtx);
 	return (ep);
 }
 
 static void
 g_raid3_event_remove(struct g_raid3_softc *sc, struct g_raid3_event *ep)
 {
 
 	mtx_lock(&sc->sc_events_mtx);
 	TAILQ_REMOVE(&sc->sc_events, ep, e_next);
 	mtx_unlock(&sc->sc_events_mtx);
 }
 
 static void
 g_raid3_event_cancel(struct g_raid3_disk *disk)
 {
 	struct g_raid3_softc *sc;
 	struct g_raid3_event *ep, *tmpep;
 
 	sc = disk->d_softc;
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	mtx_lock(&sc->sc_events_mtx);
 	TAILQ_FOREACH_SAFE(ep, &sc->sc_events, e_next, tmpep) {
 		if ((ep->e_flags & G_RAID3_EVENT_DEVICE) != 0)
 			continue;
 		if (ep->e_disk != disk)
 			continue;
 		TAILQ_REMOVE(&sc->sc_events, ep, e_next);
 		if ((ep->e_flags & G_RAID3_EVENT_DONTWAIT) != 0)
 			g_raid3_event_free(ep);
 		else {
 			ep->e_error = ECANCELED;
 			wakeup(ep);
 		}
 	}
 	mtx_unlock(&sc->sc_events_mtx);
 }
 
 /*
  * Return the number of disks in the given state.
  * If state is equal to -1, count all connected disks.
  */
 u_int
 g_raid3_ndisks(struct g_raid3_softc *sc, int state)
 {
 	struct g_raid3_disk *disk;
 	u_int n, ndisks;
 
 	sx_assert(&sc->sc_lock, SX_LOCKED);
 
 	for (n = ndisks = 0; n < sc->sc_ndisks; n++) {
 		disk = &sc->sc_disks[n];
 		if (disk->d_state == G_RAID3_DISK_STATE_NODISK)
 			continue;
 		if (state == -1 || disk->d_state == state)
 			ndisks++;
 	}
 	return (ndisks);
 }
 
 static u_int
 g_raid3_nrequests(struct g_raid3_softc *sc, struct g_consumer *cp)
 {
 	struct bio *bp;
 	u_int nreqs = 0;
 
 	mtx_lock(&sc->sc_queue_mtx);
 	TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) {
 		if (bp->bio_from == cp)
 			nreqs++;
 	}
 	mtx_unlock(&sc->sc_queue_mtx);
 	return (nreqs);
 }
 
 static int
 g_raid3_is_busy(struct g_raid3_softc *sc, struct g_consumer *cp)
 {
 
 	if (cp->index > 0) {
 		G_RAID3_DEBUG(2,
 		    "I/O requests for %s exist, can't destroy it now.",
 		    cp->provider->name);
 		return (1);
 	}
 	if (g_raid3_nrequests(sc, cp) > 0) {
 		G_RAID3_DEBUG(2,
 		    "I/O requests for %s in queue, can't destroy it now.",
 		    cp->provider->name);
 		return (1);
 	}
 	return (0);
 }
 
 static void
 g_raid3_destroy_consumer(void *arg, int flags __unused)
 {
 	struct g_consumer *cp;
 
 	g_topology_assert();
 
 	cp = arg;
 	G_RAID3_DEBUG(1, "Consumer %s destroyed.", cp->provider->name);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 }
 
 static void
 g_raid3_kill_consumer(struct g_raid3_softc *sc, struct g_consumer *cp)
 {
 	struct g_provider *pp;
 	int retaste_wait;
 
 	g_topology_assert();
 
 	cp->private = NULL;
 	if (g_raid3_is_busy(sc, cp))
 		return;
 	G_RAID3_DEBUG(2, "Consumer %s destroyed.", cp->provider->name);
 	pp = cp->provider;
 	retaste_wait = 0;
 	if (cp->acw == 1) {
 		if ((pp->geom->flags & G_GEOM_WITHER) == 0)
 			retaste_wait = 1;
 	}
 	G_RAID3_DEBUG(2, "Access %s r%dw%de%d = %d", pp->name, -cp->acr,
 	    -cp->acw, -cp->ace, 0);
 	if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
 		g_access(cp, -cp->acr, -cp->acw, -cp->ace);
 	if (retaste_wait) {
 		/*
 		 * After retaste event was send (inside g_access()), we can send
 		 * event to detach and destroy consumer.
 		 * A class, which has consumer to the given provider connected
 		 * will not receive retaste event for the provider.
 		 * This is the way how I ignore retaste events when I close
 		 * consumers opened for write: I detach and destroy consumer
 		 * after retaste event is sent.
 		 */
 		g_post_event(g_raid3_destroy_consumer, cp, M_WAITOK, NULL);
 		return;
 	}
 	G_RAID3_DEBUG(1, "Consumer %s destroyed.", pp->name);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 }
 
 static int
 g_raid3_connect_disk(struct g_raid3_disk *disk, struct g_provider *pp)
 {
 	struct g_consumer *cp;
 	int error;
 
 	g_topology_assert_not();
 	KASSERT(disk->d_consumer == NULL,
 	    ("Disk already connected (device %s).", disk->d_softc->sc_name));
 
 	g_topology_lock();
 	cp = g_new_consumer(disk->d_softc->sc_geom);
 	error = g_attach(cp, pp);
 	if (error != 0) {
 		g_destroy_consumer(cp);
 		g_topology_unlock();
 		return (error);
 	}
 	error = g_access(cp, 1, 1, 1);
 		g_topology_unlock();
 	if (error != 0) {
 		g_detach(cp);
 		g_destroy_consumer(cp);
 		G_RAID3_DEBUG(0, "Cannot open consumer %s (error=%d).",
 		    pp->name, error);
 		return (error);
 	}
 	disk->d_consumer = cp;
 	disk->d_consumer->private = disk;
 	disk->d_consumer->index = 0;
 	G_RAID3_DEBUG(2, "Disk %s connected.", g_raid3_get_diskname(disk));
 	return (0);
 }
 
 static void
 g_raid3_disconnect_consumer(struct g_raid3_softc *sc, struct g_consumer *cp)
 {
 
 	g_topology_assert();
 
 	if (cp == NULL)
 		return;
 	if (cp->provider != NULL)
 		g_raid3_kill_consumer(sc, cp);
 	else
 		g_destroy_consumer(cp);
 }
 
 /*
  * Initialize disk. This means allocate memory, create consumer, attach it
  * to the provider and open access (r1w1e1) to it.
  */
 static struct g_raid3_disk *
 g_raid3_init_disk(struct g_raid3_softc *sc, struct g_provider *pp,
     struct g_raid3_metadata *md, int *errorp)
 {
 	struct g_raid3_disk *disk;
 	int error;
 
 	disk = &sc->sc_disks[md->md_no];
 	error = g_raid3_connect_disk(disk, pp);
 	if (error != 0) {
 		if (errorp != NULL)
 			*errorp = error;
 		return (NULL);
 	}
 	disk->d_state = G_RAID3_DISK_STATE_NONE;
 	disk->d_flags = md->md_dflags;
 	if (md->md_provider[0] != '\0')
 		disk->d_flags |= G_RAID3_DISK_FLAG_HARDCODED;
 	disk->d_sync.ds_consumer = NULL;
 	disk->d_sync.ds_offset = md->md_sync_offset;
 	disk->d_sync.ds_offset_done = md->md_sync_offset;
 	disk->d_genid = md->md_genid;
 	disk->d_sync.ds_syncid = md->md_syncid;
 	if (errorp != NULL)
 		*errorp = 0;
 	return (disk);
 }
 
 static void
 g_raid3_destroy_disk(struct g_raid3_disk *disk)
 {
 	struct g_raid3_softc *sc;
 
 	g_topology_assert_not();
 	sc = disk->d_softc;
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	if (disk->d_state == G_RAID3_DISK_STATE_NODISK)
 		return;
 	g_raid3_event_cancel(disk);
 	switch (disk->d_state) {
 	case G_RAID3_DISK_STATE_SYNCHRONIZING:
 		if (sc->sc_syncdisk != NULL)
 			g_raid3_sync_stop(sc, 1);
 		/* FALLTHROUGH */
 	case G_RAID3_DISK_STATE_NEW:
 	case G_RAID3_DISK_STATE_STALE:
 	case G_RAID3_DISK_STATE_ACTIVE:
 		g_topology_lock();
 		g_raid3_disconnect_consumer(sc, disk->d_consumer);
 		g_topology_unlock();
 		disk->d_consumer = NULL;
 		break;
 	default:
 		KASSERT(0 == 1, ("Wrong disk state (%s, %s).",
 		    g_raid3_get_diskname(disk),
 		    g_raid3_disk_state2str(disk->d_state)));
 	}
 	disk->d_state = G_RAID3_DISK_STATE_NODISK;
 }
 
 static void
 g_raid3_destroy_device(struct g_raid3_softc *sc)
 {
 	struct g_raid3_event *ep;
 	struct g_raid3_disk *disk;
 	struct g_geom *gp;
 	struct g_consumer *cp;
 	u_int n;
 
 	g_topology_assert_not();
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	gp = sc->sc_geom;
 	if (sc->sc_provider != NULL)
 		g_raid3_destroy_provider(sc);
 	for (n = 0; n < sc->sc_ndisks; n++) {
 		disk = &sc->sc_disks[n];
 		if (disk->d_state != G_RAID3_DISK_STATE_NODISK) {
 			disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY;
 			g_raid3_update_metadata(disk);
 			g_raid3_destroy_disk(disk);
 		}
 	}
 	while ((ep = g_raid3_event_get(sc)) != NULL) {
 		g_raid3_event_remove(sc, ep);
 		if ((ep->e_flags & G_RAID3_EVENT_DONTWAIT) != 0)
 			g_raid3_event_free(ep);
 		else {
 			ep->e_error = ECANCELED;
 			ep->e_flags |= G_RAID3_EVENT_DONE;
 			G_RAID3_DEBUG(4, "%s: Waking up %p.", __func__, ep);
 			mtx_lock(&sc->sc_events_mtx);
 			wakeup(ep);
 			mtx_unlock(&sc->sc_events_mtx);
 		}
 	}
 	callout_drain(&sc->sc_callout);
 	cp = LIST_FIRST(&sc->sc_sync.ds_geom->consumer);
 	g_topology_lock();
 	if (cp != NULL)
 		g_raid3_disconnect_consumer(sc, cp);
 	g_wither_geom(sc->sc_sync.ds_geom, ENXIO);
 	G_RAID3_DEBUG(0, "Device %s destroyed.", gp->name);
 	g_wither_geom(gp, ENXIO);
 	g_topology_unlock();
 	if (!g_raid3_use_malloc) {
 		uma_zdestroy(sc->sc_zones[G_RAID3_ZONE_64K].sz_zone);
 		uma_zdestroy(sc->sc_zones[G_RAID3_ZONE_16K].sz_zone);
 		uma_zdestroy(sc->sc_zones[G_RAID3_ZONE_4K].sz_zone);
 	}
 	mtx_destroy(&sc->sc_queue_mtx);
 	mtx_destroy(&sc->sc_events_mtx);
 	sx_xunlock(&sc->sc_lock);
 	sx_destroy(&sc->sc_lock);
 }
 
 static void
 g_raid3_orphan(struct g_consumer *cp)
 {
 	struct g_raid3_disk *disk;
 
 	g_topology_assert();
 
 	disk = cp->private;
 	if (disk == NULL)
 		return;
 	disk->d_softc->sc_bump_id = G_RAID3_BUMP_SYNCID;
 	g_raid3_event_send(disk, G_RAID3_DISK_STATE_DISCONNECTED,
 	    G_RAID3_EVENT_DONTWAIT);
 }
 
 static int
 g_raid3_write_metadata(struct g_raid3_disk *disk, struct g_raid3_metadata *md)
 {
 	struct g_raid3_softc *sc;
 	struct g_consumer *cp;
 	off_t offset, length;
 	u_char *sector;
 	int error = 0;
 
 	g_topology_assert_not();
 	sc = disk->d_softc;
 	sx_assert(&sc->sc_lock, SX_LOCKED);
 
 	cp = disk->d_consumer;
 	KASSERT(cp != NULL, ("NULL consumer (%s).", sc->sc_name));
 	KASSERT(cp->provider != NULL, ("NULL provider (%s).", sc->sc_name));
 	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
 	    ("Consumer %s closed? (r%dw%de%d).", cp->provider->name, cp->acr,
 	    cp->acw, cp->ace));
 	length = cp->provider->sectorsize;
 	offset = cp->provider->mediasize - length;
 	sector = malloc((size_t)length, M_RAID3, M_WAITOK | M_ZERO);
 	if (md != NULL)
 		raid3_metadata_encode(md, sector);
 	error = g_write_data(cp, offset, sector, length);
 	free(sector, M_RAID3);
 	if (error != 0) {
 		if ((disk->d_flags & G_RAID3_DISK_FLAG_BROKEN) == 0) {
 			G_RAID3_DEBUG(0, "Cannot write metadata on %s "
 			    "(device=%s, error=%d).",
 			    g_raid3_get_diskname(disk), sc->sc_name, error);
 			disk->d_flags |= G_RAID3_DISK_FLAG_BROKEN;
 		} else {
 			G_RAID3_DEBUG(1, "Cannot write metadata on %s "
 			    "(device=%s, error=%d).",
 			    g_raid3_get_diskname(disk), sc->sc_name, error);
 		}
 		if (g_raid3_disconnect_on_failure &&
 		    sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE) {
 			sc->sc_bump_id |= G_RAID3_BUMP_GENID;
 			g_raid3_event_send(disk,
 			    G_RAID3_DISK_STATE_DISCONNECTED,
 			    G_RAID3_EVENT_DONTWAIT);
 		}
 	}
 	return (error);
 }
 
 int
 g_raid3_clear_metadata(struct g_raid3_disk *disk)
 {
 	int error;
 
 	g_topology_assert_not();
 	sx_assert(&disk->d_softc->sc_lock, SX_LOCKED);
 
 	error = g_raid3_write_metadata(disk, NULL);
 	if (error == 0) {
 		G_RAID3_DEBUG(2, "Metadata on %s cleared.",
 		    g_raid3_get_diskname(disk));
 	} else {
 		G_RAID3_DEBUG(0,
 		    "Cannot clear metadata on disk %s (error=%d).",
 		    g_raid3_get_diskname(disk), error);
 	}
 	return (error);
 }
 
 void
 g_raid3_fill_metadata(struct g_raid3_disk *disk, struct g_raid3_metadata *md)
 {
 	struct g_raid3_softc *sc;
 	struct g_provider *pp;
 
 	sc = disk->d_softc;
 	strlcpy(md->md_magic, G_RAID3_MAGIC, sizeof(md->md_magic));
 	md->md_version = G_RAID3_VERSION;
 	strlcpy(md->md_name, sc->sc_name, sizeof(md->md_name));
 	md->md_id = sc->sc_id;
 	md->md_all = sc->sc_ndisks;
 	md->md_genid = sc->sc_genid;
 	md->md_mediasize = sc->sc_mediasize;
 	md->md_sectorsize = sc->sc_sectorsize;
 	md->md_mflags = (sc->sc_flags & G_RAID3_DEVICE_FLAG_MASK);
 	md->md_no = disk->d_no;
 	md->md_syncid = disk->d_sync.ds_syncid;
 	md->md_dflags = (disk->d_flags & G_RAID3_DISK_FLAG_MASK);
 	if (disk->d_state != G_RAID3_DISK_STATE_SYNCHRONIZING)
 		md->md_sync_offset = 0;
 	else {
 		md->md_sync_offset =
 		    disk->d_sync.ds_offset_done / (sc->sc_ndisks - 1);
 	}
 	if (disk->d_consumer != NULL && disk->d_consumer->provider != NULL)
 		pp = disk->d_consumer->provider;
 	else
 		pp = NULL;
 	if ((disk->d_flags & G_RAID3_DISK_FLAG_HARDCODED) != 0 && pp != NULL)
 		strlcpy(md->md_provider, pp->name, sizeof(md->md_provider));
 	else
 		bzero(md->md_provider, sizeof(md->md_provider));
 	if (pp != NULL)
 		md->md_provsize = pp->mediasize;
 	else
 		md->md_provsize = 0;
 }
 
 void
 g_raid3_update_metadata(struct g_raid3_disk *disk)
 {
 	struct g_raid3_softc *sc;
 	struct g_raid3_metadata md;
 	int error;
 
 	g_topology_assert_not();
 	sc = disk->d_softc;
 	sx_assert(&sc->sc_lock, SX_LOCKED);
 
 	g_raid3_fill_metadata(disk, &md);
 	error = g_raid3_write_metadata(disk, &md);
 	if (error == 0) {
 		G_RAID3_DEBUG(2, "Metadata on %s updated.",
 		    g_raid3_get_diskname(disk));
 	} else {
 		G_RAID3_DEBUG(0,
 		    "Cannot update metadata on disk %s (error=%d).",
 		    g_raid3_get_diskname(disk), error);
 	}
 }
 
 static void
 g_raid3_bump_syncid(struct g_raid3_softc *sc)
 {
 	struct g_raid3_disk *disk;
 	u_int n;
 
 	g_topology_assert_not();
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 	KASSERT(g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) > 0,
 	    ("%s called with no active disks (device=%s).", __func__,
 	    sc->sc_name));
 
 	sc->sc_syncid++;
 	G_RAID3_DEBUG(1, "Device %s: syncid bumped to %u.", sc->sc_name,
 	    sc->sc_syncid);
 	for (n = 0; n < sc->sc_ndisks; n++) {
 		disk = &sc->sc_disks[n];
 		if (disk->d_state == G_RAID3_DISK_STATE_ACTIVE ||
 		    disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING) {
 			disk->d_sync.ds_syncid = sc->sc_syncid;
 			g_raid3_update_metadata(disk);
 		}
 	}
 }
 
 static void
 g_raid3_bump_genid(struct g_raid3_softc *sc)
 {
 	struct g_raid3_disk *disk;
 	u_int n;
 
 	g_topology_assert_not();
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 	KASSERT(g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) > 0,
 	    ("%s called with no active disks (device=%s).", __func__,
 	    sc->sc_name));
 
 	sc->sc_genid++;
 	G_RAID3_DEBUG(1, "Device %s: genid bumped to %u.", sc->sc_name,
 	    sc->sc_genid);
 	for (n = 0; n < sc->sc_ndisks; n++) {
 		disk = &sc->sc_disks[n];
 		if (disk->d_state == G_RAID3_DISK_STATE_ACTIVE ||
 		    disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING) {
 			disk->d_genid = sc->sc_genid;
 			g_raid3_update_metadata(disk);
 		}
 	}
 }
 
 static int
 g_raid3_idle(struct g_raid3_softc *sc, int acw)
 {
 	struct g_raid3_disk *disk;
 	u_int i;
 	int timeout;
 
 	g_topology_assert_not();
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	if (sc->sc_provider == NULL)
 		return (0);
 	if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_NOFAILSYNC) != 0)
 		return (0);
 	if (sc->sc_idle)
 		return (0);
 	if (sc->sc_writes > 0)
 		return (0);
 	if (acw > 0 || (acw == -1 && sc->sc_provider->acw > 0)) {
 		timeout = g_raid3_idletime - (time_uptime - sc->sc_last_write);
 		if (!g_raid3_shutdown && timeout > 0)
 			return (timeout);
 	}
 	sc->sc_idle = 1;
 	for (i = 0; i < sc->sc_ndisks; i++) {
 		disk = &sc->sc_disks[i];
 		if (disk->d_state != G_RAID3_DISK_STATE_ACTIVE)
 			continue;
 		G_RAID3_DEBUG(1, "Disk %s (device %s) marked as clean.",
 		    g_raid3_get_diskname(disk), sc->sc_name);
 		disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY;
 		g_raid3_update_metadata(disk);
 	}
 	return (0);
 }
 
 static void
 g_raid3_unidle(struct g_raid3_softc *sc)
 {
 	struct g_raid3_disk *disk;
 	u_int i;
 
 	g_topology_assert_not();
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_NOFAILSYNC) != 0)
 		return;
 	sc->sc_idle = 0;
 	sc->sc_last_write = time_uptime;
 	for (i = 0; i < sc->sc_ndisks; i++) {
 		disk = &sc->sc_disks[i];
 		if (disk->d_state != G_RAID3_DISK_STATE_ACTIVE)
 			continue;
 		G_RAID3_DEBUG(1, "Disk %s (device %s) marked as dirty.",
 		    g_raid3_get_diskname(disk), sc->sc_name);
 		disk->d_flags |= G_RAID3_DISK_FLAG_DIRTY;
 		g_raid3_update_metadata(disk);
 	}
 }
 
 /*
  * Treat bio_driver1 field in parent bio as list head and field bio_caller1
  * in child bio as pointer to the next element on the list.
  */
 #define	G_RAID3_HEAD_BIO(pbp)	(pbp)->bio_driver1
 
 #define	G_RAID3_NEXT_BIO(cbp)	(cbp)->bio_caller1
 
 #define	G_RAID3_FOREACH_BIO(pbp, bp)					\
 	for ((bp) = G_RAID3_HEAD_BIO(pbp); (bp) != NULL;		\
 	    (bp) = G_RAID3_NEXT_BIO(bp))
 
 #define	G_RAID3_FOREACH_SAFE_BIO(pbp, bp, tmpbp)			\
 	for ((bp) = G_RAID3_HEAD_BIO(pbp);				\
 	    (bp) != NULL && ((tmpbp) = G_RAID3_NEXT_BIO(bp), 1);	\
 	    (bp) = (tmpbp))
 
 static void
 g_raid3_init_bio(struct bio *pbp)
 {
 
 	G_RAID3_HEAD_BIO(pbp) = NULL;
 }
 
 static void
 g_raid3_remove_bio(struct bio *cbp)
 {
 	struct bio *pbp, *bp;
 
 	pbp = cbp->bio_parent;
 	if (G_RAID3_HEAD_BIO(pbp) == cbp)
 		G_RAID3_HEAD_BIO(pbp) = G_RAID3_NEXT_BIO(cbp);
 	else {
 		G_RAID3_FOREACH_BIO(pbp, bp) {
 			if (G_RAID3_NEXT_BIO(bp) == cbp) {
 				G_RAID3_NEXT_BIO(bp) = G_RAID3_NEXT_BIO(cbp);
 				break;
 			}
 		}
 	}
 	G_RAID3_NEXT_BIO(cbp) = NULL;
 }
 
 static void
 g_raid3_replace_bio(struct bio *sbp, struct bio *dbp)
 {
 	struct bio *pbp, *bp;
 
 	g_raid3_remove_bio(sbp);
 	pbp = dbp->bio_parent;
 	G_RAID3_NEXT_BIO(sbp) = G_RAID3_NEXT_BIO(dbp);
 	if (G_RAID3_HEAD_BIO(pbp) == dbp)
 		G_RAID3_HEAD_BIO(pbp) = sbp;
 	else {
 		G_RAID3_FOREACH_BIO(pbp, bp) {
 			if (G_RAID3_NEXT_BIO(bp) == dbp) {
 				G_RAID3_NEXT_BIO(bp) = sbp;
 				break;
 			}
 		}
 	}
 	G_RAID3_NEXT_BIO(dbp) = NULL;
 }
 
 static void
 g_raid3_destroy_bio(struct g_raid3_softc *sc, struct bio *cbp)
 {
 	struct bio *bp, *pbp;
 	size_t size;
 
 	pbp = cbp->bio_parent;
 	pbp->bio_children--;
 	KASSERT(cbp->bio_data != NULL, ("NULL bio_data"));
 	size = pbp->bio_length / (sc->sc_ndisks - 1);
 	g_raid3_free(sc, cbp->bio_data, size);
 	if (G_RAID3_HEAD_BIO(pbp) == cbp) {
 		G_RAID3_HEAD_BIO(pbp) = G_RAID3_NEXT_BIO(cbp);
 		G_RAID3_NEXT_BIO(cbp) = NULL;
 		g_destroy_bio(cbp);
 	} else {
 		G_RAID3_FOREACH_BIO(pbp, bp) {
 			if (G_RAID3_NEXT_BIO(bp) == cbp)
 				break;
 		}
 		if (bp != NULL) {
 			KASSERT(G_RAID3_NEXT_BIO(bp) != NULL,
 			    ("NULL bp->bio_driver1"));
 			G_RAID3_NEXT_BIO(bp) = G_RAID3_NEXT_BIO(cbp);
 			G_RAID3_NEXT_BIO(cbp) = NULL;
 		}
 		g_destroy_bio(cbp);
 	}
 }
 
 static struct bio *
 g_raid3_clone_bio(struct g_raid3_softc *sc, struct bio *pbp)
 {
 	struct bio *bp, *cbp;
 	size_t size;
 	int memflag;
 
 	cbp = g_clone_bio(pbp);
 	if (cbp == NULL)
 		return (NULL);
 	size = pbp->bio_length / (sc->sc_ndisks - 1);
 	if ((pbp->bio_cflags & G_RAID3_BIO_CFLAG_REGULAR) != 0)
 		memflag = M_WAITOK;
 	else
 		memflag = M_NOWAIT;
 	cbp->bio_data = g_raid3_alloc(sc, size, memflag);
 	if (cbp->bio_data == NULL) {
 		pbp->bio_children--;
 		g_destroy_bio(cbp);
 		return (NULL);
 	}
 	G_RAID3_NEXT_BIO(cbp) = NULL;
 	if (G_RAID3_HEAD_BIO(pbp) == NULL)
 		G_RAID3_HEAD_BIO(pbp) = cbp;
 	else {
 		G_RAID3_FOREACH_BIO(pbp, bp) {
 			if (G_RAID3_NEXT_BIO(bp) == NULL) {
 				G_RAID3_NEXT_BIO(bp) = cbp;
 				break;
 			}
 		}
 	}
 	return (cbp);
 }
 
 static void
 g_raid3_scatter(struct bio *pbp)
 {
 	struct g_raid3_softc *sc;
 	struct g_raid3_disk *disk;
 	struct bio *bp, *cbp, *tmpbp;
 	off_t atom, cadd, padd, left;
 	int first;
 
 	sc = pbp->bio_to->geom->softc;
 	bp = NULL;
 	if ((pbp->bio_pflags & G_RAID3_BIO_PFLAG_NOPARITY) == 0) {
 		/*
 		 * Find bio for which we should calculate data.
 		 */
 		G_RAID3_FOREACH_BIO(pbp, cbp) {
 			if ((cbp->bio_cflags & G_RAID3_BIO_CFLAG_PARITY) != 0) {
 				bp = cbp;
 				break;
 			}
 		}
 		KASSERT(bp != NULL, ("NULL parity bio."));
 	}
 	atom = sc->sc_sectorsize / (sc->sc_ndisks - 1);
 	cadd = padd = 0;
 	for (left = pbp->bio_length; left > 0; left -= sc->sc_sectorsize) {
 		G_RAID3_FOREACH_BIO(pbp, cbp) {
 			if (cbp == bp)
 				continue;
 			bcopy(pbp->bio_data + padd, cbp->bio_data + cadd, atom);
 			padd += atom;
 		}
 		cadd += atom;
 	}
 	if ((pbp->bio_pflags & G_RAID3_BIO_PFLAG_NOPARITY) == 0) {
 		/*
 		 * Calculate parity.
 		 */
 		first = 1;
 		G_RAID3_FOREACH_SAFE_BIO(pbp, cbp, tmpbp) {
 			if (cbp == bp)
 				continue;
 			if (first) {
 				bcopy(cbp->bio_data, bp->bio_data,
 				    bp->bio_length);
 				first = 0;
 			} else {
 				g_raid3_xor(cbp->bio_data, bp->bio_data,
 				    bp->bio_length);
 			}
 			if ((cbp->bio_cflags & G_RAID3_BIO_CFLAG_NODISK) != 0)
 				g_raid3_destroy_bio(sc, cbp);
 		}
 	}
 	G_RAID3_FOREACH_SAFE_BIO(pbp, cbp, tmpbp) {
 		struct g_consumer *cp;
 
 		disk = cbp->bio_caller2;
 		cp = disk->d_consumer;
 		cbp->bio_to = cp->provider;
 		G_RAID3_LOGREQ(3, cbp, "Sending request.");
 		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
 		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
 		    cp->acr, cp->acw, cp->ace));
 		cp->index++;
 		sc->sc_writes++;
 		g_io_request(cbp, cp);
 	}
 }
 
 static void
 g_raid3_gather(struct bio *pbp)
 {
 	struct g_raid3_softc *sc;
 	struct g_raid3_disk *disk;
 	struct bio *xbp, *fbp, *cbp;
 	off_t atom, cadd, padd, left;
 
 	sc = pbp->bio_to->geom->softc;
 	/*
 	 * Find bio for which we have to calculate data.
 	 * While going through this path, check if all requests
 	 * succeeded, if not, deny whole request.
 	 * If we're in COMPLETE mode, we allow one request to fail,
 	 * so if we find one, we're sending it to the parity consumer.
 	 * If there are more failed requests, we deny whole request.
 	 */
 	xbp = fbp = NULL;
 	G_RAID3_FOREACH_BIO(pbp, cbp) {
 		if ((cbp->bio_cflags & G_RAID3_BIO_CFLAG_PARITY) != 0) {
 			KASSERT(xbp == NULL, ("More than one parity bio."));
 			xbp = cbp;
 		}
 		if (cbp->bio_error == 0)
 			continue;
 		/*
 		 * Found failed request.
 		 */
 		if (fbp == NULL) {
 			if ((pbp->bio_pflags & G_RAID3_BIO_PFLAG_DEGRADED) != 0) {
 				/*
 				 * We are already in degraded mode, so we can't
 				 * accept any failures.
 				 */
 				if (pbp->bio_error == 0)
 					pbp->bio_error = cbp->bio_error;
 			} else {
 				fbp = cbp;
 			}
 		} else {
 			/*
 			 * Next failed request, that's too many.
 			 */
 			if (pbp->bio_error == 0)
 				pbp->bio_error = fbp->bio_error;
 		}
 		disk = cbp->bio_caller2;
 		if (disk == NULL)
 			continue;
 		if ((disk->d_flags & G_RAID3_DISK_FLAG_BROKEN) == 0) {
 			disk->d_flags |= G_RAID3_DISK_FLAG_BROKEN;
 			G_RAID3_LOGREQ(0, cbp, "Request failed (error=%d).",
 			    cbp->bio_error);
 		} else {
 			G_RAID3_LOGREQ(1, cbp, "Request failed (error=%d).",
 			    cbp->bio_error);
 		}
 		if (g_raid3_disconnect_on_failure &&
 		    sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE) {
 			sc->sc_bump_id |= G_RAID3_BUMP_GENID;
 			g_raid3_event_send(disk,
 			    G_RAID3_DISK_STATE_DISCONNECTED,
 			    G_RAID3_EVENT_DONTWAIT);
 		}
 	}
 	if (pbp->bio_error != 0)
 		goto finish;
 	if (fbp != NULL && (pbp->bio_pflags & G_RAID3_BIO_PFLAG_VERIFY) != 0) {
 		pbp->bio_pflags &= ~G_RAID3_BIO_PFLAG_VERIFY;
 		if (xbp != fbp)
 			g_raid3_replace_bio(xbp, fbp);
 		g_raid3_destroy_bio(sc, fbp);
 	} else if (fbp != NULL) {
 		struct g_consumer *cp;
 
 		/*
 		 * One request failed, so send the same request to
 		 * the parity consumer.
 		 */
 		disk = pbp->bio_driver2;
 		if (disk->d_state != G_RAID3_DISK_STATE_ACTIVE) {
 			pbp->bio_error = fbp->bio_error;
 			goto finish;
 		}
 		pbp->bio_pflags |= G_RAID3_BIO_PFLAG_DEGRADED;
 		pbp->bio_inbed--;
 		fbp->bio_flags &= ~(BIO_DONE | BIO_ERROR);
 		if (disk->d_no == sc->sc_ndisks - 1)
 			fbp->bio_cflags |= G_RAID3_BIO_CFLAG_PARITY;
 		fbp->bio_error = 0;
 		fbp->bio_completed = 0;
 		fbp->bio_children = 0;
 		fbp->bio_inbed = 0;
 		cp = disk->d_consumer;
 		fbp->bio_caller2 = disk;
 		fbp->bio_to = cp->provider;
 		G_RAID3_LOGREQ(3, fbp, "Sending request (recover).");
 		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
 		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
 		    cp->acr, cp->acw, cp->ace));
 		cp->index++;
 		g_io_request(fbp, cp);
 		return;
 	}
 	if (xbp != NULL) {
 		/*
 		 * Calculate parity.
 		 */
 		G_RAID3_FOREACH_BIO(pbp, cbp) {
 			if ((cbp->bio_cflags & G_RAID3_BIO_CFLAG_PARITY) != 0)
 				continue;
 			g_raid3_xor(cbp->bio_data, xbp->bio_data,
 			    xbp->bio_length);
 		}
 		xbp->bio_cflags &= ~G_RAID3_BIO_CFLAG_PARITY;
 		if ((pbp->bio_pflags & G_RAID3_BIO_PFLAG_VERIFY) != 0) {
 			if (!g_raid3_is_zero(xbp)) {
 				g_raid3_parity_mismatch++;
 				pbp->bio_error = EIO;
 				goto finish;
 			}
 			g_raid3_destroy_bio(sc, xbp);
 		}
 	}
 	atom = sc->sc_sectorsize / (sc->sc_ndisks - 1);
 	cadd = padd = 0;
 	for (left = pbp->bio_length; left > 0; left -= sc->sc_sectorsize) {
 		G_RAID3_FOREACH_BIO(pbp, cbp) {
 			bcopy(cbp->bio_data + cadd, pbp->bio_data + padd, atom);
 			pbp->bio_completed += atom;
 			padd += atom;
 		}
 		cadd += atom;
 	}
 finish:
 	if (pbp->bio_error == 0)
 		G_RAID3_LOGREQ(3, pbp, "Request finished.");
 	else {
 		if ((pbp->bio_pflags & G_RAID3_BIO_PFLAG_VERIFY) != 0)
 			G_RAID3_LOGREQ(1, pbp, "Verification error.");
 		else
 			G_RAID3_LOGREQ(0, pbp, "Request failed.");
 	}
 	pbp->bio_pflags &= ~G_RAID3_BIO_PFLAG_MASK;
 	while ((cbp = G_RAID3_HEAD_BIO(pbp)) != NULL)
 		g_raid3_destroy_bio(sc, cbp);
 	g_io_deliver(pbp, pbp->bio_error);
 }
 
 static void
 g_raid3_done(struct bio *bp)
 {
 	struct g_raid3_softc *sc;
 
 	sc = bp->bio_from->geom->softc;
 	bp->bio_cflags |= G_RAID3_BIO_CFLAG_REGULAR;
 	G_RAID3_LOGREQ(3, bp, "Regular request done (error=%d).", bp->bio_error);
 	mtx_lock(&sc->sc_queue_mtx);
 	bioq_insert_head(&sc->sc_queue, bp);
 	mtx_unlock(&sc->sc_queue_mtx);
 	wakeup(sc);
 	wakeup(&sc->sc_queue);
 }
 
 static void
 g_raid3_regular_request(struct bio *cbp)
 {
 	struct g_raid3_softc *sc;
 	struct g_raid3_disk *disk;
 	struct bio *pbp;
 
 	g_topology_assert_not();
 
 	pbp = cbp->bio_parent;
 	sc = pbp->bio_to->geom->softc;
 	cbp->bio_from->index--;
 	if (cbp->bio_cmd == BIO_WRITE)
 		sc->sc_writes--;
 	disk = cbp->bio_from->private;
 	if (disk == NULL) {
 		g_topology_lock();
 		g_raid3_kill_consumer(sc, cbp->bio_from);
 		g_topology_unlock();
 	}
 
 	G_RAID3_LOGREQ(3, cbp, "Request finished.");
 	pbp->bio_inbed++;
 	KASSERT(pbp->bio_inbed <= pbp->bio_children,
 	    ("bio_inbed (%u) is bigger than bio_children (%u).", pbp->bio_inbed,
 	    pbp->bio_children));
 	if (pbp->bio_inbed != pbp->bio_children)
 		return;
 	switch (pbp->bio_cmd) {
 	case BIO_READ:
 		g_raid3_gather(pbp);
 		break;
 	case BIO_WRITE:
 	case BIO_DELETE:
 	    {
 		int error = 0;
 
 		pbp->bio_completed = pbp->bio_length;
 		while ((cbp = G_RAID3_HEAD_BIO(pbp)) != NULL) {
 			if (cbp->bio_error == 0) {
 				g_raid3_destroy_bio(sc, cbp);
 				continue;
 			}
 
 			if (error == 0)
 				error = cbp->bio_error;
 			else if (pbp->bio_error == 0) {
 				/*
 				 * Next failed request, that's too many.
 				 */
 				pbp->bio_error = error;
 			}
 
 			disk = cbp->bio_caller2;
 			if (disk == NULL) {
 				g_raid3_destroy_bio(sc, cbp);
 				continue;
 			}
 
 			if ((disk->d_flags & G_RAID3_DISK_FLAG_BROKEN) == 0) {
 				disk->d_flags |= G_RAID3_DISK_FLAG_BROKEN;
 				G_RAID3_LOGREQ(0, cbp,
 				    "Request failed (error=%d).",
 				    cbp->bio_error);
 			} else {
 				G_RAID3_LOGREQ(1, cbp,
 				    "Request failed (error=%d).",
 				    cbp->bio_error);
 			}
 			if (g_raid3_disconnect_on_failure &&
 			    sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE) {
 				sc->sc_bump_id |= G_RAID3_BUMP_GENID;
 				g_raid3_event_send(disk,
 				    G_RAID3_DISK_STATE_DISCONNECTED,
 				    G_RAID3_EVENT_DONTWAIT);
 			}
 			g_raid3_destroy_bio(sc, cbp);
 		}
 		if (pbp->bio_error == 0)
 			G_RAID3_LOGREQ(3, pbp, "Request finished.");
 		else
 			G_RAID3_LOGREQ(0, pbp, "Request failed.");
 		pbp->bio_pflags &= ~G_RAID3_BIO_PFLAG_DEGRADED;
 		pbp->bio_pflags &= ~G_RAID3_BIO_PFLAG_NOPARITY;
 		bioq_remove(&sc->sc_inflight, pbp);
 		/* Release delayed sync requests if possible. */
 		g_raid3_sync_release(sc);
 		g_io_deliver(pbp, pbp->bio_error);
 		break;
 	    }
 	}
 }
 
 static void
 g_raid3_sync_done(struct bio *bp)
 {
 	struct g_raid3_softc *sc;
 
 	G_RAID3_LOGREQ(3, bp, "Synchronization request delivered.");
 	sc = bp->bio_from->geom->softc;
 	bp->bio_cflags |= G_RAID3_BIO_CFLAG_SYNC;
 	mtx_lock(&sc->sc_queue_mtx);
 	bioq_insert_head(&sc->sc_queue, bp);
 	mtx_unlock(&sc->sc_queue_mtx);
 	wakeup(sc);
 	wakeup(&sc->sc_queue);
 }
 
 static void
 g_raid3_flush(struct g_raid3_softc *sc, struct bio *bp)
 {
 	struct bio_queue_head queue;
 	struct g_raid3_disk *disk;
 	struct g_consumer *cp;
 	struct bio *cbp;
 	u_int i;
 
 	bioq_init(&queue);
 	for (i = 0; i < sc->sc_ndisks; i++) {
 		disk = &sc->sc_disks[i];
 		if (disk->d_state != G_RAID3_DISK_STATE_ACTIVE)
 			continue;
 		cbp = g_clone_bio(bp);
 		if (cbp == NULL) {
 			for (cbp = bioq_first(&queue); cbp != NULL;
 			    cbp = bioq_first(&queue)) {
 				bioq_remove(&queue, cbp);
 				g_destroy_bio(cbp);
 			}
 			if (bp->bio_error == 0)
 				bp->bio_error = ENOMEM;
 			g_io_deliver(bp, bp->bio_error);
 			return;
 		}
 		bioq_insert_tail(&queue, cbp);
 		cbp->bio_done = g_std_done;
 		cbp->bio_caller1 = disk;
 		cbp->bio_to = disk->d_consumer->provider;
 	}
 	for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) {
 		bioq_remove(&queue, cbp);
 		G_RAID3_LOGREQ(3, cbp, "Sending request.");
 		disk = cbp->bio_caller1;
 		cbp->bio_caller1 = NULL;
 		cp = disk->d_consumer;
 		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
 		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
 		    cp->acr, cp->acw, cp->ace));
 		g_io_request(cbp, disk->d_consumer);
 	}
 }
 
 static void
 g_raid3_start(struct bio *bp)
 {
 	struct g_raid3_softc *sc;
 
 	sc = bp->bio_to->geom->softc;
 	/*
 	 * If sc == NULL or there are no valid disks, provider's error
 	 * should be set and g_raid3_start() should not be called at all.
 	 */
 	KASSERT(sc != NULL && (sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED ||
 	    sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE),
 	    ("Provider's error should be set (error=%d)(device=%s).",
 	    bp->bio_to->error, bp->bio_to->name));
 	G_RAID3_LOGREQ(3, bp, "Request received.");
 
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 	case BIO_WRITE:
 	case BIO_DELETE:
 		break;
 	case BIO_FLUSH:
 		g_raid3_flush(sc, bp);
 		return;
 	case BIO_GETATTR:
 	default:
 		g_io_deliver(bp, EOPNOTSUPP);
 		return;
 	}
 	mtx_lock(&sc->sc_queue_mtx);
 	bioq_insert_tail(&sc->sc_queue, bp);
 	mtx_unlock(&sc->sc_queue_mtx);
 	G_RAID3_DEBUG(4, "%s: Waking up %p.", __func__, sc);
 	wakeup(sc);
 }
 
 /*
  * Return TRUE if the given request is colliding with a in-progress
  * synchronization request.
  */
 static int
 g_raid3_sync_collision(struct g_raid3_softc *sc, struct bio *bp)
 {
 	struct g_raid3_disk *disk;
 	struct bio *sbp;
 	off_t rstart, rend, sstart, send;
 	int i;
 
 	disk = sc->sc_syncdisk;
 	if (disk == NULL)
 		return (0);
 	rstart = bp->bio_offset;
 	rend = bp->bio_offset + bp->bio_length;
 	for (i = 0; i < g_raid3_syncreqs; i++) {
 		sbp = disk->d_sync.ds_bios[i];
 		if (sbp == NULL)
 			continue;
 		sstart = sbp->bio_offset;
 		send = sbp->bio_length;
 		if (sbp->bio_cmd == BIO_WRITE) {
 			sstart *= sc->sc_ndisks - 1;
 			send *= sc->sc_ndisks - 1;
 		}
 		send += sstart;
 		if (rend > sstart && rstart < send)
 			return (1);
 	}
 	return (0);
 }
 
 /*
  * Return TRUE if the given sync request is colliding with a in-progress regular
  * request.
  */
 static int
 g_raid3_regular_collision(struct g_raid3_softc *sc, struct bio *sbp)
 {
 	off_t rstart, rend, sstart, send;
 	struct bio *bp;
 
 	if (sc->sc_syncdisk == NULL)
 		return (0);
 	sstart = sbp->bio_offset;
 	send = sstart + sbp->bio_length;
 	TAILQ_FOREACH(bp, &sc->sc_inflight.queue, bio_queue) {
 		rstart = bp->bio_offset;
 		rend = bp->bio_offset + bp->bio_length;
 		if (rend > sstart && rstart < send)
 			return (1);
 	}
 	return (0);
 }
 
 /*
  * Puts request onto delayed queue.
  */
 static void
 g_raid3_regular_delay(struct g_raid3_softc *sc, struct bio *bp)
 {
 
 	G_RAID3_LOGREQ(2, bp, "Delaying request.");
 	bioq_insert_head(&sc->sc_regular_delayed, bp);
 }
 
 /*
  * Puts synchronization request onto delayed queue.
  */
 static void
 g_raid3_sync_delay(struct g_raid3_softc *sc, struct bio *bp)
 {
 
 	G_RAID3_LOGREQ(2, bp, "Delaying synchronization request.");
 	bioq_insert_tail(&sc->sc_sync_delayed, bp);
 }
 
 /*
  * Releases delayed regular requests which don't collide anymore with sync
  * requests.
  */
 static void
 g_raid3_regular_release(struct g_raid3_softc *sc)
 {
 	struct bio *bp, *bp2;
 
 	TAILQ_FOREACH_SAFE(bp, &sc->sc_regular_delayed.queue, bio_queue, bp2) {
 		if (g_raid3_sync_collision(sc, bp))
 			continue;
 		bioq_remove(&sc->sc_regular_delayed, bp);
 		G_RAID3_LOGREQ(2, bp, "Releasing delayed request (%p).", bp);
 		mtx_lock(&sc->sc_queue_mtx);
 		bioq_insert_head(&sc->sc_queue, bp);
 #if 0
 		/*
 		 * wakeup() is not needed, because this function is called from
 		 * the worker thread.
 		 */
 		wakeup(&sc->sc_queue);
 #endif
 		mtx_unlock(&sc->sc_queue_mtx);
 	}
 }
 
 /*
  * Releases delayed sync requests which don't collide anymore with regular
  * requests.
  */
 static void
 g_raid3_sync_release(struct g_raid3_softc *sc)
 {
 	struct bio *bp, *bp2;
 
 	TAILQ_FOREACH_SAFE(bp, &sc->sc_sync_delayed.queue, bio_queue, bp2) {
 		if (g_raid3_regular_collision(sc, bp))
 			continue;
 		bioq_remove(&sc->sc_sync_delayed, bp);
 		G_RAID3_LOGREQ(2, bp,
 		    "Releasing delayed synchronization request.");
 		g_io_request(bp, bp->bio_from);
 	}
 }
 
 /*
  * Handle synchronization requests.
  * Every synchronization request is two-steps process: first, READ request is
  * send to active provider and then WRITE request (with read data) to the provider
  * being synchronized. When WRITE is finished, new synchronization request is
  * send.
  */
 static void
 g_raid3_sync_request(struct bio *bp)
 {
 	struct g_raid3_softc *sc;
 	struct g_raid3_disk *disk;
 
 	bp->bio_from->index--;
 	sc = bp->bio_from->geom->softc;
 	disk = bp->bio_from->private;
 	if (disk == NULL) {
 		sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */
 		g_topology_lock();
 		g_raid3_kill_consumer(sc, bp->bio_from);
 		g_topology_unlock();
 		free(bp->bio_data, M_RAID3);
 		g_destroy_bio(bp);
 		sx_xlock(&sc->sc_lock);
 		return;
 	}
 
 	/*
 	 * Synchronization request.
 	 */
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 	    {
 		struct g_consumer *cp;
 		u_char *dst, *src;
 		off_t left;
 		u_int atom;
 
 		if (bp->bio_error != 0) {
 			G_RAID3_LOGREQ(0, bp,
 			    "Synchronization request failed (error=%d).",
 			    bp->bio_error);
 			g_destroy_bio(bp);
 			return;
 		}
 		G_RAID3_LOGREQ(3, bp, "Synchronization request finished.");
 		atom = sc->sc_sectorsize / (sc->sc_ndisks - 1);
 		dst = src = bp->bio_data;
 		if (disk->d_no == sc->sc_ndisks - 1) {
 			u_int n;
 
 			/* Parity component. */
 			for (left = bp->bio_length; left > 0;
 			    left -= sc->sc_sectorsize) {
 				bcopy(src, dst, atom);
 				src += atom;
 				for (n = 1; n < sc->sc_ndisks - 1; n++) {
 					g_raid3_xor(src, dst, atom);
 					src += atom;
 				}
 				dst += atom;
 			}
 		} else {
 			/* Regular component. */
 			src += atom * disk->d_no;
 			for (left = bp->bio_length; left > 0;
 			    left -= sc->sc_sectorsize) {
 				bcopy(src, dst, atom);
 				src += sc->sc_sectorsize;
 				dst += atom;
 			}
 		}
 		bp->bio_driver1 = bp->bio_driver2 = NULL;
 		bp->bio_pflags = 0;
 		bp->bio_offset /= sc->sc_ndisks - 1;
 		bp->bio_length /= sc->sc_ndisks - 1;
 		bp->bio_cmd = BIO_WRITE;
 		bp->bio_cflags = 0;
 		bp->bio_children = bp->bio_inbed = 0;
 		cp = disk->d_consumer;
 		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
 		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
 		    cp->acr, cp->acw, cp->ace));
 		cp->index++;
 		g_io_request(bp, cp);
 		return;
 	    }
 	case BIO_WRITE:
 	    {
 		struct g_raid3_disk_sync *sync;
 		off_t boffset, moffset;
 		void *data;
 		int i;
 
 		if (bp->bio_error != 0) {
 			G_RAID3_LOGREQ(0, bp,
 			    "Synchronization request failed (error=%d).",
 			    bp->bio_error);
 			g_destroy_bio(bp);
 			sc->sc_bump_id |= G_RAID3_BUMP_GENID;
 			g_raid3_event_send(disk,
 			    G_RAID3_DISK_STATE_DISCONNECTED,
 			    G_RAID3_EVENT_DONTWAIT);
 			return;
 		}
 		G_RAID3_LOGREQ(3, bp, "Synchronization request finished.");
 		sync = &disk->d_sync;
 		if (sync->ds_offset == sc->sc_mediasize / (sc->sc_ndisks - 1) ||
 		    sync->ds_consumer == NULL ||
 		    (sc->sc_flags & G_RAID3_DEVICE_FLAG_DESTROY) != 0) {
 			/* Don't send more synchronization requests. */
 			sync->ds_inflight--;
 			if (sync->ds_bios != NULL) {
 				i = (int)(uintptr_t)bp->bio_caller1;
 				sync->ds_bios[i] = NULL;
 			}
 			free(bp->bio_data, M_RAID3);
 			g_destroy_bio(bp);
 			if (sync->ds_inflight > 0)
 				return;
 			if (sync->ds_consumer == NULL ||
 			    (sc->sc_flags & G_RAID3_DEVICE_FLAG_DESTROY) != 0) {
 				return;
 			}
 			/*
 			 * Disk up-to-date, activate it.
 			 */
 			g_raid3_event_send(disk, G_RAID3_DISK_STATE_ACTIVE,
 			    G_RAID3_EVENT_DONTWAIT);
 			return;
 		}
 
 		/* Send next synchronization request. */
 		data = bp->bio_data;
 		g_reset_bio(bp);
 		bp->bio_cmd = BIO_READ;
 		bp->bio_offset = sync->ds_offset * (sc->sc_ndisks - 1);
 		bp->bio_length = MIN(MAXPHYS, sc->sc_mediasize - bp->bio_offset);
 		sync->ds_offset += bp->bio_length / (sc->sc_ndisks - 1);
 		bp->bio_done = g_raid3_sync_done;
 		bp->bio_data = data;
 		bp->bio_from = sync->ds_consumer;
 		bp->bio_to = sc->sc_provider;
 		G_RAID3_LOGREQ(3, bp, "Sending synchronization request.");
 		sync->ds_consumer->index++;
 		/*
 		 * Delay the request if it is colliding with a regular request.
 		 */
 		if (g_raid3_regular_collision(sc, bp))
 			g_raid3_sync_delay(sc, bp);
 		else
 			g_io_request(bp, sync->ds_consumer);
 
 		/* Release delayed requests if possible. */
 		g_raid3_regular_release(sc);
 
 		/* Find the smallest offset. */
 		moffset = sc->sc_mediasize;
 		for (i = 0; i < g_raid3_syncreqs; i++) {
 			bp = sync->ds_bios[i];
 			boffset = bp->bio_offset;
 			if (bp->bio_cmd == BIO_WRITE)
 				boffset *= sc->sc_ndisks - 1;
 			if (boffset < moffset)
 				moffset = boffset;
 		}
 		if (sync->ds_offset_done + (MAXPHYS * 100) < moffset) {
 			/* Update offset_done on every 100 blocks. */
 			sync->ds_offset_done = moffset;
 			g_raid3_update_metadata(disk);
 		}
 		return;
 	    }
 	default:
 		KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
 		    bp->bio_cmd, sc->sc_name));
 		break;
 	}
 }
 
 static int
 g_raid3_register_request(struct bio *pbp)
 {
 	struct g_raid3_softc *sc;
 	struct g_raid3_disk *disk;
 	struct g_consumer *cp;
 	struct bio *cbp, *tmpbp;
 	off_t offset, length;
 	u_int n, ndisks;
 	int round_robin, verify;
 
 	ndisks = 0;
 	sc = pbp->bio_to->geom->softc;
 	if ((pbp->bio_cflags & G_RAID3_BIO_CFLAG_REGSYNC) != 0 &&
 	    sc->sc_syncdisk == NULL) {
 		g_io_deliver(pbp, EIO);
 		return (0);
 	}
 	g_raid3_init_bio(pbp);
 	length = pbp->bio_length / (sc->sc_ndisks - 1);
 	offset = pbp->bio_offset / (sc->sc_ndisks - 1);
 	round_robin = verify = 0;
 	switch (pbp->bio_cmd) {
 	case BIO_READ:
 		if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_VERIFY) != 0 &&
 		    sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE) {
 			pbp->bio_pflags |= G_RAID3_BIO_PFLAG_VERIFY;
 			verify = 1;
 			ndisks = sc->sc_ndisks;
 		} else {
 			verify = 0;
 			ndisks = sc->sc_ndisks - 1;
 		}
 		if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_ROUND_ROBIN) != 0 &&
 		    sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE) {
 			round_robin = 1;
 		} else {
 			round_robin = 0;
 		}
 		KASSERT(!round_robin || !verify,
 		    ("ROUND-ROBIN and VERIFY are mutually exclusive."));
 		pbp->bio_driver2 = &sc->sc_disks[sc->sc_ndisks - 1];
 		break;
 	case BIO_WRITE:
 	case BIO_DELETE:
 		/*
 		 * Delay the request if it is colliding with a synchronization
 		 * request.
 		 */
 		if (g_raid3_sync_collision(sc, pbp)) {
 			g_raid3_regular_delay(sc, pbp);
 			return (0);
 		}
 
 		if (sc->sc_idle)
 			g_raid3_unidle(sc);
 		else
 			sc->sc_last_write = time_uptime;
 
 		ndisks = sc->sc_ndisks;
 		break;
 	}
 	for (n = 0; n < ndisks; n++) {
 		disk = &sc->sc_disks[n];
 		cbp = g_raid3_clone_bio(sc, pbp);
 		if (cbp == NULL) {
 			while ((cbp = G_RAID3_HEAD_BIO(pbp)) != NULL)
 				g_raid3_destroy_bio(sc, cbp);
 			/*
 			 * To prevent deadlock, we must run back up
 			 * with the ENOMEM for failed requests of any
 			 * of our consumers.  Our own sync requests
 			 * can stick around, as they are finite.
 			 */
 			if ((pbp->bio_cflags &
 			    G_RAID3_BIO_CFLAG_REGULAR) != 0) {
 				g_io_deliver(pbp, ENOMEM);
 				return (0);
 			}
 			return (ENOMEM);
 		}
 		cbp->bio_offset = offset;
 		cbp->bio_length = length;
 		cbp->bio_done = g_raid3_done;
 		switch (pbp->bio_cmd) {
 		case BIO_READ:
 			if (disk->d_state != G_RAID3_DISK_STATE_ACTIVE) {
 				/*
 				 * Replace invalid component with the parity
 				 * component.
 				 */
 				disk = &sc->sc_disks[sc->sc_ndisks - 1];
 				cbp->bio_cflags |= G_RAID3_BIO_CFLAG_PARITY;
 				pbp->bio_pflags |= G_RAID3_BIO_PFLAG_DEGRADED;
 			} else if (round_robin &&
 			    disk->d_no == sc->sc_round_robin) {
 				/*
 				 * In round-robin mode skip one data component
 				 * and use parity component when reading.
 				 */
 				pbp->bio_driver2 = disk;
 				disk = &sc->sc_disks[sc->sc_ndisks - 1];
 				cbp->bio_cflags |= G_RAID3_BIO_CFLAG_PARITY;
 				sc->sc_round_robin++;
 				round_robin = 0;
 			} else if (verify && disk->d_no == sc->sc_ndisks - 1) {
 				cbp->bio_cflags |= G_RAID3_BIO_CFLAG_PARITY;
 			}
 			break;
 		case BIO_WRITE:
 		case BIO_DELETE:
 			if (disk->d_state == G_RAID3_DISK_STATE_ACTIVE ||
 			    disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING) {
 				if (n == ndisks - 1) {
 					/*
 					 * Active parity component, mark it as such.
 					 */
 					cbp->bio_cflags |=
 					    G_RAID3_BIO_CFLAG_PARITY;
 				}
 			} else {
 				pbp->bio_pflags |= G_RAID3_BIO_PFLAG_DEGRADED;
 				if (n == ndisks - 1) {
 					/*
 					 * Parity component is not connected,
 					 * so destroy its request.
 					 */
 					pbp->bio_pflags |=
 					    G_RAID3_BIO_PFLAG_NOPARITY;
 					g_raid3_destroy_bio(sc, cbp);
 					cbp = NULL;
 				} else {
 					cbp->bio_cflags |=
 					    G_RAID3_BIO_CFLAG_NODISK;
 					disk = NULL;
 				}
 			}
 			break;
 		}
 		if (cbp != NULL)
 			cbp->bio_caller2 = disk;
 	}
 	switch (pbp->bio_cmd) {
 	case BIO_READ:
 		if (round_robin) {
 			/*
 			 * If we are in round-robin mode and 'round_robin' is
 			 * still 1, it means, that we skipped parity component
 			 * for this read and must reset sc_round_robin field.
 			 */
 			sc->sc_round_robin = 0;
 		}
 		G_RAID3_FOREACH_SAFE_BIO(pbp, cbp, tmpbp) {
 			disk = cbp->bio_caller2;
 			cp = disk->d_consumer;
 			cbp->bio_to = cp->provider;
 			G_RAID3_LOGREQ(3, cbp, "Sending request.");
 			KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
 			    ("Consumer %s not opened (r%dw%de%d).",
 			    cp->provider->name, cp->acr, cp->acw, cp->ace));
 			cp->index++;
 			g_io_request(cbp, cp);
 		}
 		break;
 	case BIO_WRITE:
 	case BIO_DELETE:
 		/*
 		 * Put request onto inflight queue, so we can check if new
 		 * synchronization requests don't collide with it.
 		 */
 		bioq_insert_tail(&sc->sc_inflight, pbp);
 
 		/*
 		 * Bump syncid on first write.
 		 */
 		if ((sc->sc_bump_id & G_RAID3_BUMP_SYNCID) != 0) {
 			sc->sc_bump_id &= ~G_RAID3_BUMP_SYNCID;
 			g_raid3_bump_syncid(sc);
 		}
 		g_raid3_scatter(pbp);
 		break;
 	}
 	return (0);
 }
 
 static int
 g_raid3_can_destroy(struct g_raid3_softc *sc)
 {
 	struct g_geom *gp;
 	struct g_consumer *cp;
 
 	g_topology_assert();
 	gp = sc->sc_geom;
 	if (gp->softc == NULL)
 		return (1);
 	LIST_FOREACH(cp, &gp->consumer, consumer) {
 		if (g_raid3_is_busy(sc, cp))
 			return (0);
 	}
 	gp = sc->sc_sync.ds_geom;
 	LIST_FOREACH(cp, &gp->consumer, consumer) {
 		if (g_raid3_is_busy(sc, cp))
 			return (0);
 	}
 	G_RAID3_DEBUG(2, "No I/O requests for %s, it can be destroyed.",
 	    sc->sc_name);
 	return (1);
 }
 
 static int
 g_raid3_try_destroy(struct g_raid3_softc *sc)
 {
 
 	g_topology_assert_not();
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	if (sc->sc_rootmount != NULL) {
 		G_RAID3_DEBUG(1, "root_mount_rel[%u] %p", __LINE__,
 		    sc->sc_rootmount);
 		root_mount_rel(sc->sc_rootmount);
 		sc->sc_rootmount = NULL;
 	}
 
 	g_topology_lock();
 	if (!g_raid3_can_destroy(sc)) {
 		g_topology_unlock();
 		return (0);
 	}
 	sc->sc_geom->softc = NULL;
 	sc->sc_sync.ds_geom->softc = NULL;
 	if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_WAIT) != 0) {
 		g_topology_unlock();
 		G_RAID3_DEBUG(4, "%s: Waking up %p.", __func__,
 		    &sc->sc_worker);
 		/* Unlock sc_lock here, as it can be destroyed after wakeup. */
 		sx_xunlock(&sc->sc_lock);
 		wakeup(&sc->sc_worker);
 		sc->sc_worker = NULL;
 	} else {
 		g_topology_unlock();
 		g_raid3_destroy_device(sc);
 		free(sc->sc_disks, M_RAID3);
 		free(sc, M_RAID3);
 	}
 	return (1);
 }
 
 /*
  * Worker thread.
  */
 static void
 g_raid3_worker(void *arg)
 {
 	struct g_raid3_softc *sc;
 	struct g_raid3_event *ep;
 	struct bio *bp;
 	int timeout;
 
 	sc = arg;
 	thread_lock(curthread);
 	sched_prio(curthread, PRIBIO);
 	thread_unlock(curthread);
 
 	sx_xlock(&sc->sc_lock);
 	for (;;) {
 		G_RAID3_DEBUG(5, "%s: Let's see...", __func__);
 		/*
 		 * First take a look at events.
 		 * This is important to handle events before any I/O requests.
 		 */
 		ep = g_raid3_event_get(sc);
 		if (ep != NULL) {
 			g_raid3_event_remove(sc, ep);
 			if ((ep->e_flags & G_RAID3_EVENT_DEVICE) != 0) {
 				/* Update only device status. */
 				G_RAID3_DEBUG(3,
 				    "Running event for device %s.",
 				    sc->sc_name);
 				ep->e_error = 0;
 				g_raid3_update_device(sc, 1);
 			} else {
 				/* Update disk status. */
 				G_RAID3_DEBUG(3, "Running event for disk %s.",
 				     g_raid3_get_diskname(ep->e_disk));
 				ep->e_error = g_raid3_update_disk(ep->e_disk,
 				    ep->e_state);
 				if (ep->e_error == 0)
 					g_raid3_update_device(sc, 0);
 			}
 			if ((ep->e_flags & G_RAID3_EVENT_DONTWAIT) != 0) {
 				KASSERT(ep->e_error == 0,
 				    ("Error cannot be handled."));
 				g_raid3_event_free(ep);
 			} else {
 				ep->e_flags |= G_RAID3_EVENT_DONE;
 				G_RAID3_DEBUG(4, "%s: Waking up %p.", __func__,
 				    ep);
 				mtx_lock(&sc->sc_events_mtx);
 				wakeup(ep);
 				mtx_unlock(&sc->sc_events_mtx);
 			}
 			if ((sc->sc_flags &
 			    G_RAID3_DEVICE_FLAG_DESTROY) != 0) {
 				if (g_raid3_try_destroy(sc)) {
 					curthread->td_pflags &= ~TDP_GEOM;
 					G_RAID3_DEBUG(1, "Thread exiting.");
 					kproc_exit(0);
 				}
 			}
 			G_RAID3_DEBUG(5, "%s: I'm here 1.", __func__);
 			continue;
 		}
 		/*
 		 * Check if we can mark array as CLEAN and if we can't take
 		 * how much seconds should we wait.
 		 */
 		timeout = g_raid3_idle(sc, -1);
 		/*
 		 * Now I/O requests.
 		 */
 		/* Get first request from the queue. */
 		mtx_lock(&sc->sc_queue_mtx);
 		bp = bioq_first(&sc->sc_queue);
 		if (bp == NULL) {
 			if ((sc->sc_flags &
 			    G_RAID3_DEVICE_FLAG_DESTROY) != 0) {
 				mtx_unlock(&sc->sc_queue_mtx);
 				if (g_raid3_try_destroy(sc)) {
 					curthread->td_pflags &= ~TDP_GEOM;
 					G_RAID3_DEBUG(1, "Thread exiting.");
 					kproc_exit(0);
 				}
 				mtx_lock(&sc->sc_queue_mtx);
 			}
 			sx_xunlock(&sc->sc_lock);
 			/*
 			 * XXX: We can miss an event here, because an event
 			 *      can be added without sx-device-lock and without
 			 *      mtx-queue-lock. Maybe I should just stop using
 			 *      dedicated mutex for events synchronization and
 			 *      stick with the queue lock?
 			 *      The event will hang here until next I/O request
 			 *      or next event is received.
 			 */
 			MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, "r3:w1",
 			    timeout * hz);
 			sx_xlock(&sc->sc_lock);
 			G_RAID3_DEBUG(5, "%s: I'm here 4.", __func__);
 			continue;
 		}
 process:
 		bioq_remove(&sc->sc_queue, bp);
 		mtx_unlock(&sc->sc_queue_mtx);
 
 		if (bp->bio_from->geom == sc->sc_sync.ds_geom &&
 		    (bp->bio_cflags & G_RAID3_BIO_CFLAG_SYNC) != 0) {
 			g_raid3_sync_request(bp);	/* READ */
 		} else if (bp->bio_to != sc->sc_provider) {
 			if ((bp->bio_cflags & G_RAID3_BIO_CFLAG_REGULAR) != 0)
 				g_raid3_regular_request(bp);
 			else if ((bp->bio_cflags & G_RAID3_BIO_CFLAG_SYNC) != 0)
 				g_raid3_sync_request(bp);	/* WRITE */
 			else {
 				KASSERT(0,
 				    ("Invalid request cflags=0x%hx to=%s.",
 				    bp->bio_cflags, bp->bio_to->name));
 			}
 		} else if (g_raid3_register_request(bp) != 0) {
 			mtx_lock(&sc->sc_queue_mtx);
 			bioq_insert_head(&sc->sc_queue, bp);
 			/*
 			 * We are short in memory, let see if there are finished
 			 * request we can free.
 			 */
 			TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) {
 				if (bp->bio_cflags & G_RAID3_BIO_CFLAG_REGULAR)
 					goto process;
 			}
 			/*
 			 * No finished regular request, so at least keep
 			 * synchronization running.
 			 */
 			TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) {
 				if (bp->bio_cflags & G_RAID3_BIO_CFLAG_SYNC)
 					goto process;
 			}
 			sx_xunlock(&sc->sc_lock);
 			MSLEEP(&sc->sc_queue, &sc->sc_queue_mtx, PRIBIO | PDROP,
 			    "r3:lowmem", hz / 10);
 			sx_xlock(&sc->sc_lock);
 		}
 		G_RAID3_DEBUG(5, "%s: I'm here 9.", __func__);
 	}
 }
 
 static void
 g_raid3_update_idle(struct g_raid3_softc *sc, struct g_raid3_disk *disk)
 {
 
 	sx_assert(&sc->sc_lock, SX_LOCKED);
 	if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_NOFAILSYNC) != 0)
 		return;
 	if (!sc->sc_idle && (disk->d_flags & G_RAID3_DISK_FLAG_DIRTY) == 0) {
 		G_RAID3_DEBUG(1, "Disk %s (device %s) marked as dirty.",
 		    g_raid3_get_diskname(disk), sc->sc_name);
 		disk->d_flags |= G_RAID3_DISK_FLAG_DIRTY;
 	} else if (sc->sc_idle &&
 	    (disk->d_flags & G_RAID3_DISK_FLAG_DIRTY) != 0) {
 		G_RAID3_DEBUG(1, "Disk %s (device %s) marked as clean.",
 		    g_raid3_get_diskname(disk), sc->sc_name);
 		disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY;
 	}
 }
 
 static void
 g_raid3_sync_start(struct g_raid3_softc *sc)
 {
 	struct g_raid3_disk *disk;
 	struct g_consumer *cp;
 	struct bio *bp;
 	int error;
 	u_int n;
 
 	g_topology_assert_not();
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	KASSERT(sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED,
 	    ("Device not in DEGRADED state (%s, %u).", sc->sc_name,
 	    sc->sc_state));
 	KASSERT(sc->sc_syncdisk == NULL, ("Syncdisk is not NULL (%s, %u).",
 	    sc->sc_name, sc->sc_state));
 	disk = NULL;
 	for (n = 0; n < sc->sc_ndisks; n++) {
 		if (sc->sc_disks[n].d_state != G_RAID3_DISK_STATE_SYNCHRONIZING)
 			continue;
 		disk = &sc->sc_disks[n];
 		break;
 	}
 	if (disk == NULL)
 		return;
 
 	sx_xunlock(&sc->sc_lock);
 	g_topology_lock();
 	cp = g_new_consumer(sc->sc_sync.ds_geom);
 	error = g_attach(cp, sc->sc_provider);
 	KASSERT(error == 0,
 	    ("Cannot attach to %s (error=%d).", sc->sc_name, error));
 	error = g_access(cp, 1, 0, 0);
 	KASSERT(error == 0, ("Cannot open %s (error=%d).", sc->sc_name, error));
 	g_topology_unlock();
 	sx_xlock(&sc->sc_lock);
 
 	G_RAID3_DEBUG(0, "Device %s: rebuilding provider %s.", sc->sc_name,
 	    g_raid3_get_diskname(disk));
 	if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_NOFAILSYNC) == 0)
 		disk->d_flags |= G_RAID3_DISK_FLAG_DIRTY;
 	KASSERT(disk->d_sync.ds_consumer == NULL,
 	    ("Sync consumer already exists (device=%s, disk=%s).",
 	    sc->sc_name, g_raid3_get_diskname(disk)));
 
 	disk->d_sync.ds_consumer = cp;
 	disk->d_sync.ds_consumer->private = disk;
 	disk->d_sync.ds_consumer->index = 0;
 	sc->sc_syncdisk = disk;
 
 	/*
 	 * Allocate memory for synchronization bios and initialize them.
 	 */
 	disk->d_sync.ds_bios = malloc(sizeof(struct bio *) * g_raid3_syncreqs,
 	    M_RAID3, M_WAITOK);
 	for (n = 0; n < g_raid3_syncreqs; n++) {
 		bp = g_alloc_bio();
 		disk->d_sync.ds_bios[n] = bp;
 		bp->bio_parent = NULL;
 		bp->bio_cmd = BIO_READ;
 		bp->bio_data = malloc(MAXPHYS, M_RAID3, M_WAITOK);
 		bp->bio_cflags = 0;
 		bp->bio_offset = disk->d_sync.ds_offset * (sc->sc_ndisks - 1);
 		bp->bio_length = MIN(MAXPHYS, sc->sc_mediasize - bp->bio_offset);
 		disk->d_sync.ds_offset += bp->bio_length / (sc->sc_ndisks - 1);
 		bp->bio_done = g_raid3_sync_done;
 		bp->bio_from = disk->d_sync.ds_consumer;
 		bp->bio_to = sc->sc_provider;
 		bp->bio_caller1 = (void *)(uintptr_t)n;
 	}
 
 	/* Set the number of in-flight synchronization requests. */
 	disk->d_sync.ds_inflight = g_raid3_syncreqs;
 
 	/*
 	 * Fire off first synchronization requests.
 	 */
 	for (n = 0; n < g_raid3_syncreqs; n++) {
 		bp = disk->d_sync.ds_bios[n];
 		G_RAID3_LOGREQ(3, bp, "Sending synchronization request.");
 		disk->d_sync.ds_consumer->index++;
 		/*
 		 * Delay the request if it is colliding with a regular request.
 		 */
 		if (g_raid3_regular_collision(sc, bp))
 			g_raid3_sync_delay(sc, bp);
 		else
 			g_io_request(bp, disk->d_sync.ds_consumer);
 	}
 }
 
 /*
  * Stop synchronization process.
  * type: 0 - synchronization finished
  *       1 - synchronization stopped
  */
 static void
 g_raid3_sync_stop(struct g_raid3_softc *sc, int type)
 {
 	struct g_raid3_disk *disk;
 	struct g_consumer *cp;
 
 	g_topology_assert_not();
 	sx_assert(&sc->sc_lock, SX_LOCKED);
 
 	KASSERT(sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED,
 	    ("Device not in DEGRADED state (%s, %u).", sc->sc_name,
 	    sc->sc_state));
 	disk = sc->sc_syncdisk;
 	sc->sc_syncdisk = NULL;
 	KASSERT(disk != NULL, ("No disk was synchronized (%s).", sc->sc_name));
 	KASSERT(disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING,
 	    ("Wrong disk state (%s, %s).", g_raid3_get_diskname(disk),
 	    g_raid3_disk_state2str(disk->d_state)));
 	if (disk->d_sync.ds_consumer == NULL)
 		return;
 
 	if (type == 0) {
 		G_RAID3_DEBUG(0, "Device %s: rebuilding provider %s finished.",
 		    sc->sc_name, g_raid3_get_diskname(disk));
 	} else /* if (type == 1) */ {
 		G_RAID3_DEBUG(0, "Device %s: rebuilding provider %s stopped.",
 		    sc->sc_name, g_raid3_get_diskname(disk));
 	}
 	free(disk->d_sync.ds_bios, M_RAID3);
 	disk->d_sync.ds_bios = NULL;
 	cp = disk->d_sync.ds_consumer;
 	disk->d_sync.ds_consumer = NULL;
 	disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY;
 	sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */
 	g_topology_lock();
 	g_raid3_kill_consumer(sc, cp);
 	g_topology_unlock();
 	sx_xlock(&sc->sc_lock);
 }
 
 static void
 g_raid3_launch_provider(struct g_raid3_softc *sc)
 {
 	struct g_provider *pp;
 	struct g_raid3_disk *disk;
 	int n;
 
 	sx_assert(&sc->sc_lock, SX_LOCKED);
 
 	g_topology_lock();
 	pp = g_new_providerf(sc->sc_geom, "raid3/%s", sc->sc_name);
 	pp->mediasize = sc->sc_mediasize;
 	pp->sectorsize = sc->sc_sectorsize;
 	pp->stripesize = 0;
 	pp->stripeoffset = 0;
 	for (n = 0; n < sc->sc_ndisks; n++) {
 		disk = &sc->sc_disks[n];
 		if (disk->d_consumer && disk->d_consumer->provider &&
 		    disk->d_consumer->provider->stripesize > pp->stripesize) {
 			pp->stripesize = disk->d_consumer->provider->stripesize;
 			pp->stripeoffset = disk->d_consumer->provider->stripeoffset;
 		}
 	}
 	pp->stripesize *= sc->sc_ndisks - 1;
 	pp->stripeoffset *= sc->sc_ndisks - 1;
 	sc->sc_provider = pp;
 	g_error_provider(pp, 0);
 	g_topology_unlock();
 	G_RAID3_DEBUG(0, "Device %s launched (%u/%u).", pp->name,
 	    g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE), sc->sc_ndisks);
 
 	if (sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED)
 		g_raid3_sync_start(sc);
 }
 
 static void
 g_raid3_destroy_provider(struct g_raid3_softc *sc)
 {
 	struct bio *bp;
 
 	g_topology_assert_not();
 	KASSERT(sc->sc_provider != NULL, ("NULL provider (device=%s).",
 	    sc->sc_name));
 
 	g_topology_lock();
 	g_error_provider(sc->sc_provider, ENXIO);
 	mtx_lock(&sc->sc_queue_mtx);
 	while ((bp = bioq_first(&sc->sc_queue)) != NULL) {
 		bioq_remove(&sc->sc_queue, bp);
 		g_io_deliver(bp, ENXIO);
 	}
 	mtx_unlock(&sc->sc_queue_mtx);
 	G_RAID3_DEBUG(0, "Device %s: provider %s destroyed.", sc->sc_name,
 	    sc->sc_provider->name);
 	g_wither_provider(sc->sc_provider, ENXIO);
 	g_topology_unlock();
 	sc->sc_provider = NULL;
 	if (sc->sc_syncdisk != NULL)
 		g_raid3_sync_stop(sc, 1);
 }
 
 static void
 g_raid3_go(void *arg)
 {
 	struct g_raid3_softc *sc;
 
 	sc = arg;
 	G_RAID3_DEBUG(0, "Force device %s start due to timeout.", sc->sc_name);
 	g_raid3_event_send(sc, 0,
 	    G_RAID3_EVENT_DONTWAIT | G_RAID3_EVENT_DEVICE);
 }
 
 static u_int
 g_raid3_determine_state(struct g_raid3_disk *disk)
 {
 	struct g_raid3_softc *sc;
 	u_int state;
 
 	sc = disk->d_softc;
 	if (sc->sc_syncid == disk->d_sync.ds_syncid) {
 		if ((disk->d_flags &
 		    G_RAID3_DISK_FLAG_SYNCHRONIZING) == 0) {
 			/* Disk does not need synchronization. */
 			state = G_RAID3_DISK_STATE_ACTIVE;
 		} else {
 			if ((sc->sc_flags &
 			     G_RAID3_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
 			    (disk->d_flags &
 			     G_RAID3_DISK_FLAG_FORCE_SYNC) != 0) {
 				/*
 				 * We can start synchronization from
 				 * the stored offset.
 				 */
 				state = G_RAID3_DISK_STATE_SYNCHRONIZING;
 			} else {
 				state = G_RAID3_DISK_STATE_STALE;
 			}
 		}
 	} else if (disk->d_sync.ds_syncid < sc->sc_syncid) {
 		/*
 		 * Reset all synchronization data for this disk,
 		 * because if it even was synchronized, it was
 		 * synchronized to disks with different syncid.
 		 */
 		disk->d_flags |= G_RAID3_DISK_FLAG_SYNCHRONIZING;
 		disk->d_sync.ds_offset = 0;
 		disk->d_sync.ds_offset_done = 0;
 		disk->d_sync.ds_syncid = sc->sc_syncid;
 		if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
 		    (disk->d_flags & G_RAID3_DISK_FLAG_FORCE_SYNC) != 0) {
 			state = G_RAID3_DISK_STATE_SYNCHRONIZING;
 		} else {
 			state = G_RAID3_DISK_STATE_STALE;
 		}
 	} else /* if (sc->sc_syncid < disk->d_sync.ds_syncid) */ {
 		/*
 		 * Not good, NOT GOOD!
 		 * It means that device was started on stale disks
 		 * and more fresh disk just arrive.
 		 * If there were writes, device is broken, sorry.
 		 * I think the best choice here is don't touch
 		 * this disk and inform the user loudly.
 		 */
 		G_RAID3_DEBUG(0, "Device %s was started before the freshest "
 		    "disk (%s) arrives!! It will not be connected to the "
 		    "running device.", sc->sc_name,
 		    g_raid3_get_diskname(disk));
 		g_raid3_destroy_disk(disk);
 		state = G_RAID3_DISK_STATE_NONE;
 		/* Return immediately, because disk was destroyed. */
 		return (state);
 	}
 	G_RAID3_DEBUG(3, "State for %s disk: %s.",
 	    g_raid3_get_diskname(disk), g_raid3_disk_state2str(state));
 	return (state);
 }
 
 /*
  * Update device state.
  */
 static void
 g_raid3_update_device(struct g_raid3_softc *sc, boolean_t force)
 {
 	struct g_raid3_disk *disk;
 	u_int state;
 
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	switch (sc->sc_state) {
 	case G_RAID3_DEVICE_STATE_STARTING:
 	    {
 		u_int n, ndirty, ndisks, genid, syncid;
 
 		KASSERT(sc->sc_provider == NULL,
 		    ("Non-NULL provider in STARTING state (%s).", sc->sc_name));
 		/*
 		 * Are we ready? We are, if all disks are connected or
 		 * one disk is missing and 'force' is true.
 		 */
 		if (g_raid3_ndisks(sc, -1) + force == sc->sc_ndisks) {
 			if (!force)
 				callout_drain(&sc->sc_callout);
 		} else {
 			if (force) {
 				/*
 				 * Timeout expired, so destroy device.
 				 */
 				sc->sc_flags |= G_RAID3_DEVICE_FLAG_DESTROY;
 				G_RAID3_DEBUG(1, "root_mount_rel[%u] %p",
 				    __LINE__, sc->sc_rootmount);
 				root_mount_rel(sc->sc_rootmount);
 				sc->sc_rootmount = NULL;
 			}
 			return;
 		}
 
 		/*
 		 * Find the biggest genid.
 		 */
 		genid = 0;
 		for (n = 0; n < sc->sc_ndisks; n++) {
 			disk = &sc->sc_disks[n];
 			if (disk->d_state == G_RAID3_DISK_STATE_NODISK)
 				continue;
 			if (disk->d_genid > genid)
 				genid = disk->d_genid;
 		}
 		sc->sc_genid = genid;
 		/*
 		 * Remove all disks without the biggest genid.
 		 */
 		for (n = 0; n < sc->sc_ndisks; n++) {
 			disk = &sc->sc_disks[n];
 			if (disk->d_state == G_RAID3_DISK_STATE_NODISK)
 				continue;
 			if (disk->d_genid < genid) {
 				G_RAID3_DEBUG(0,
 				    "Component %s (device %s) broken, skipping.",
 				    g_raid3_get_diskname(disk), sc->sc_name);
 				g_raid3_destroy_disk(disk);
 			}
 		}
 
 		/*
 		 * There must be at least 'sc->sc_ndisks - 1' components
 		 * with the same syncid and without SYNCHRONIZING flag.
 		 */
 
 		/*
 		 * Find the biggest syncid, number of valid components and
 		 * number of dirty components.
 		 */
 		ndirty = ndisks = syncid = 0;
 		for (n = 0; n < sc->sc_ndisks; n++) {
 			disk = &sc->sc_disks[n];
 			if (disk->d_state == G_RAID3_DISK_STATE_NODISK)
 				continue;
 			if ((disk->d_flags & G_RAID3_DISK_FLAG_DIRTY) != 0)
 				ndirty++;
 			if (disk->d_sync.ds_syncid > syncid) {
 				syncid = disk->d_sync.ds_syncid;
 				ndisks = 0;
 			} else if (disk->d_sync.ds_syncid < syncid) {
 				continue;
 			}
 			if ((disk->d_flags &
 			    G_RAID3_DISK_FLAG_SYNCHRONIZING) != 0) {
 				continue;
 			}
 			ndisks++;
 		}
 		/*
 		 * Do we have enough valid components?
 		 */
 		if (ndisks + 1 < sc->sc_ndisks) {
 			G_RAID3_DEBUG(0,
 			    "Device %s is broken, too few valid components.",
 			    sc->sc_name);
 			sc->sc_flags |= G_RAID3_DEVICE_FLAG_DESTROY;
 			return;
 		}
 		/*
 		 * If there is one DIRTY component and all disks are present,
 		 * mark it for synchronization. If there is more than one DIRTY
 		 * component, mark parity component for synchronization.
 		 */
 		if (ndisks == sc->sc_ndisks && ndirty == 1) {
 			for (n = 0; n < sc->sc_ndisks; n++) {
 				disk = &sc->sc_disks[n];
 				if ((disk->d_flags &
 				    G_RAID3_DISK_FLAG_DIRTY) == 0) {
 					continue;
 				}
 				disk->d_flags |=
 				    G_RAID3_DISK_FLAG_SYNCHRONIZING;
 			}
 		} else if (ndisks == sc->sc_ndisks && ndirty > 1) {
 			disk = &sc->sc_disks[sc->sc_ndisks - 1];
 			disk->d_flags |= G_RAID3_DISK_FLAG_SYNCHRONIZING;
 		}
 
 		sc->sc_syncid = syncid;
 		if (force) {
 			/* Remember to bump syncid on first write. */
 			sc->sc_bump_id |= G_RAID3_BUMP_SYNCID;
 		}
 		if (ndisks == sc->sc_ndisks)
 			state = G_RAID3_DEVICE_STATE_COMPLETE;
 		else /* if (ndisks == sc->sc_ndisks - 1) */
 			state = G_RAID3_DEVICE_STATE_DEGRADED;
 		G_RAID3_DEBUG(1, "Device %s state changed from %s to %s.",
 		    sc->sc_name, g_raid3_device_state2str(sc->sc_state),
 		    g_raid3_device_state2str(state));
 		sc->sc_state = state;
 		for (n = 0; n < sc->sc_ndisks; n++) {
 			disk = &sc->sc_disks[n];
 			if (disk->d_state == G_RAID3_DISK_STATE_NODISK)
 				continue;
 			state = g_raid3_determine_state(disk);
 			g_raid3_event_send(disk, state, G_RAID3_EVENT_DONTWAIT);
 			if (state == G_RAID3_DISK_STATE_STALE)
 				sc->sc_bump_id |= G_RAID3_BUMP_SYNCID;
 		}
 		break;
 	    }
 	case G_RAID3_DEVICE_STATE_DEGRADED:
 		/*
 		 * Genid need to be bumped immediately, so do it here.
 		 */
 		if ((sc->sc_bump_id & G_RAID3_BUMP_GENID) != 0) {
 			sc->sc_bump_id &= ~G_RAID3_BUMP_GENID;
 			g_raid3_bump_genid(sc);
 		}
 
 		if (g_raid3_ndisks(sc, G_RAID3_DISK_STATE_NEW) > 0)
 			return;
 		if (g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) <
 		    sc->sc_ndisks - 1) {
 			if (sc->sc_provider != NULL)
 				g_raid3_destroy_provider(sc);
 			sc->sc_flags |= G_RAID3_DEVICE_FLAG_DESTROY;
 			return;
 		}
 		if (g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) ==
 		    sc->sc_ndisks) {
 			state = G_RAID3_DEVICE_STATE_COMPLETE;
 			G_RAID3_DEBUG(1,
 			    "Device %s state changed from %s to %s.",
 			    sc->sc_name, g_raid3_device_state2str(sc->sc_state),
 			    g_raid3_device_state2str(state));
 			sc->sc_state = state;
 		}
 		if (sc->sc_provider == NULL)
 			g_raid3_launch_provider(sc);
 		if (sc->sc_rootmount != NULL) {
 			G_RAID3_DEBUG(1, "root_mount_rel[%u] %p", __LINE__,
 			    sc->sc_rootmount);
 			root_mount_rel(sc->sc_rootmount);
 			sc->sc_rootmount = NULL;
 		}
 		break;
 	case G_RAID3_DEVICE_STATE_COMPLETE:
 		/*
 		 * Genid need to be bumped immediately, so do it here.
 		 */
 		if ((sc->sc_bump_id & G_RAID3_BUMP_GENID) != 0) {
 			sc->sc_bump_id &= ~G_RAID3_BUMP_GENID;
 			g_raid3_bump_genid(sc);
 		}
 
 		if (g_raid3_ndisks(sc, G_RAID3_DISK_STATE_NEW) > 0)
 			return;
 		KASSERT(g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) >=
 		    sc->sc_ndisks - 1,
 		    ("Too few ACTIVE components in COMPLETE state (device %s).",
 		    sc->sc_name));
 		if (g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) ==
 		    sc->sc_ndisks - 1) {
 			state = G_RAID3_DEVICE_STATE_DEGRADED;
 			G_RAID3_DEBUG(1,
 			    "Device %s state changed from %s to %s.",
 			    sc->sc_name, g_raid3_device_state2str(sc->sc_state),
 			    g_raid3_device_state2str(state));
 			sc->sc_state = state;
 		}
 		if (sc->sc_provider == NULL)
 			g_raid3_launch_provider(sc);
 		if (sc->sc_rootmount != NULL) {
 			G_RAID3_DEBUG(1, "root_mount_rel[%u] %p", __LINE__,
 			    sc->sc_rootmount);
 			root_mount_rel(sc->sc_rootmount);
 			sc->sc_rootmount = NULL;
 		}
 		break;
 	default:
 		KASSERT(1 == 0, ("Wrong device state (%s, %s).", sc->sc_name,
 		    g_raid3_device_state2str(sc->sc_state)));
 		break;
 	}
 }
 
 /*
  * Update disk state and device state if needed.
  */
 #define	DISK_STATE_CHANGED()	G_RAID3_DEBUG(1,			\
 	"Disk %s state changed from %s to %s (device %s).",		\
 	g_raid3_get_diskname(disk),					\
 	g_raid3_disk_state2str(disk->d_state),				\
 	g_raid3_disk_state2str(state), sc->sc_name)
 static int
 g_raid3_update_disk(struct g_raid3_disk *disk, u_int state)
 {
 	struct g_raid3_softc *sc;
 
 	sc = disk->d_softc;
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 again:
 	G_RAID3_DEBUG(3, "Changing disk %s state from %s to %s.",
 	    g_raid3_get_diskname(disk), g_raid3_disk_state2str(disk->d_state),
 	    g_raid3_disk_state2str(state));
 	switch (state) {
 	case G_RAID3_DISK_STATE_NEW:
 		/*
 		 * Possible scenarios:
 		 * 1. New disk arrive.
 		 */
 		/* Previous state should be NONE. */
 		KASSERT(disk->d_state == G_RAID3_DISK_STATE_NONE,
 		    ("Wrong disk state (%s, %s).", g_raid3_get_diskname(disk),
 		    g_raid3_disk_state2str(disk->d_state)));
 		DISK_STATE_CHANGED();
 
 		disk->d_state = state;
 		G_RAID3_DEBUG(1, "Device %s: provider %s detected.",
 		    sc->sc_name, g_raid3_get_diskname(disk));
 		if (sc->sc_state == G_RAID3_DEVICE_STATE_STARTING)
 			break;
 		KASSERT(sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED ||
 		    sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE,
 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
 		    g_raid3_device_state2str(sc->sc_state),
 		    g_raid3_get_diskname(disk),
 		    g_raid3_disk_state2str(disk->d_state)));
 		state = g_raid3_determine_state(disk);
 		if (state != G_RAID3_DISK_STATE_NONE)
 			goto again;
 		break;
 	case G_RAID3_DISK_STATE_ACTIVE:
 		/*
 		 * Possible scenarios:
 		 * 1. New disk does not need synchronization.
 		 * 2. Synchronization process finished successfully.
 		 */
 		KASSERT(sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED ||
 		    sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE,
 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
 		    g_raid3_device_state2str(sc->sc_state),
 		    g_raid3_get_diskname(disk),
 		    g_raid3_disk_state2str(disk->d_state)));
 		/* Previous state should be NEW or SYNCHRONIZING. */
 		KASSERT(disk->d_state == G_RAID3_DISK_STATE_NEW ||
 		    disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING,
 		    ("Wrong disk state (%s, %s).", g_raid3_get_diskname(disk),
 		    g_raid3_disk_state2str(disk->d_state)));
 		DISK_STATE_CHANGED();
 
 		if (disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING) {
 			disk->d_flags &= ~G_RAID3_DISK_FLAG_SYNCHRONIZING;
 			disk->d_flags &= ~G_RAID3_DISK_FLAG_FORCE_SYNC;
 			g_raid3_sync_stop(sc, 0);
 		}
 		disk->d_state = state;
 		disk->d_sync.ds_offset = 0;
 		disk->d_sync.ds_offset_done = 0;
 		g_raid3_update_idle(sc, disk);
 		g_raid3_update_metadata(disk);
 		G_RAID3_DEBUG(1, "Device %s: provider %s activated.",
 		    sc->sc_name, g_raid3_get_diskname(disk));
 		break;
 	case G_RAID3_DISK_STATE_STALE:
 		/*
 		 * Possible scenarios:
 		 * 1. Stale disk was connected.
 		 */
 		/* Previous state should be NEW. */
 		KASSERT(disk->d_state == G_RAID3_DISK_STATE_NEW,
 		    ("Wrong disk state (%s, %s).", g_raid3_get_diskname(disk),
 		    g_raid3_disk_state2str(disk->d_state)));
 		KASSERT(sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED ||
 		    sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE,
 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
 		    g_raid3_device_state2str(sc->sc_state),
 		    g_raid3_get_diskname(disk),
 		    g_raid3_disk_state2str(disk->d_state)));
 		/*
 		 * STALE state is only possible if device is marked
 		 * NOAUTOSYNC.
 		 */
 		KASSERT((sc->sc_flags & G_RAID3_DEVICE_FLAG_NOAUTOSYNC) != 0,
 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
 		    g_raid3_device_state2str(sc->sc_state),
 		    g_raid3_get_diskname(disk),
 		    g_raid3_disk_state2str(disk->d_state)));
 		DISK_STATE_CHANGED();
 
 		disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY;
 		disk->d_state = state;
 		g_raid3_update_metadata(disk);
 		G_RAID3_DEBUG(0, "Device %s: provider %s is stale.",
 		    sc->sc_name, g_raid3_get_diskname(disk));
 		break;
 	case G_RAID3_DISK_STATE_SYNCHRONIZING:
 		/*
 		 * Possible scenarios:
 		 * 1. Disk which needs synchronization was connected.
 		 */
 		/* Previous state should be NEW. */
 		KASSERT(disk->d_state == G_RAID3_DISK_STATE_NEW,
 		    ("Wrong disk state (%s, %s).", g_raid3_get_diskname(disk),
 		    g_raid3_disk_state2str(disk->d_state)));
 		KASSERT(sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED ||
 		    sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE,
 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
 		    g_raid3_device_state2str(sc->sc_state),
 		    g_raid3_get_diskname(disk),
 		    g_raid3_disk_state2str(disk->d_state)));
 		DISK_STATE_CHANGED();
 
 		if (disk->d_state == G_RAID3_DISK_STATE_NEW)
 			disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY;
 		disk->d_state = state;
 		if (sc->sc_provider != NULL) {
 			g_raid3_sync_start(sc);
 			g_raid3_update_metadata(disk);
 		}
 		break;
 	case G_RAID3_DISK_STATE_DISCONNECTED:
 		/*
 		 * Possible scenarios:
 		 * 1. Device wasn't running yet, but disk disappear.
 		 * 2. Disk was active and disapppear.
 		 * 3. Disk disappear during synchronization process.
 		 */
 		if (sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED ||
 		    sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE) {
 			/*
 			 * Previous state should be ACTIVE, STALE or
 			 * SYNCHRONIZING.
 			 */
 			KASSERT(disk->d_state == G_RAID3_DISK_STATE_ACTIVE ||
 			    disk->d_state == G_RAID3_DISK_STATE_STALE ||
 			    disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING,
 			    ("Wrong disk state (%s, %s).",
 			    g_raid3_get_diskname(disk),
 			    g_raid3_disk_state2str(disk->d_state)));
 		} else if (sc->sc_state == G_RAID3_DEVICE_STATE_STARTING) {
 			/* Previous state should be NEW. */
 			KASSERT(disk->d_state == G_RAID3_DISK_STATE_NEW,
 			    ("Wrong disk state (%s, %s).",
 			    g_raid3_get_diskname(disk),
 			    g_raid3_disk_state2str(disk->d_state)));
 			/*
 			 * Reset bumping syncid if disk disappeared in STARTING
 			 * state.
 			 */
 			if ((sc->sc_bump_id & G_RAID3_BUMP_SYNCID) != 0)
 				sc->sc_bump_id &= ~G_RAID3_BUMP_SYNCID;
 #ifdef	INVARIANTS
 		} else {
 			KASSERT(1 == 0, ("Wrong device state (%s, %s, %s, %s).",
 			    sc->sc_name,
 			    g_raid3_device_state2str(sc->sc_state),
 			    g_raid3_get_diskname(disk),
 			    g_raid3_disk_state2str(disk->d_state)));
 #endif
 		}
 		DISK_STATE_CHANGED();
 		G_RAID3_DEBUG(0, "Device %s: provider %s disconnected.",
 		    sc->sc_name, g_raid3_get_diskname(disk));
 
 		g_raid3_destroy_disk(disk);
 		break;
 	default:
 		KASSERT(1 == 0, ("Unknown state (%u).", state));
 		break;
 	}
 	return (0);
 }
 #undef	DISK_STATE_CHANGED
 
 int
 g_raid3_read_metadata(struct g_consumer *cp, struct g_raid3_metadata *md)
 {
 	struct g_provider *pp;
 	u_char *buf;
 	int error;
 
 	g_topology_assert();
 
 	error = g_access(cp, 1, 0, 0);
 	if (error != 0)
 		return (error);
 	pp = cp->provider;
 	g_topology_unlock();
 	/* Metadata are stored on last sector. */
 	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
 	    &error);
 	g_topology_lock();
 	g_access(cp, -1, 0, 0);
 	if (buf == NULL) {
 		G_RAID3_DEBUG(1, "Cannot read metadata from %s (error=%d).",
 		    cp->provider->name, error);
 		return (error);
 	}
 
 	/* Decode metadata. */
 	error = raid3_metadata_decode(buf, md);
 	g_free(buf);
 	if (strcmp(md->md_magic, G_RAID3_MAGIC) != 0)
 		return (EINVAL);
 	if (md->md_version > G_RAID3_VERSION) {
 		G_RAID3_DEBUG(0,
 		    "Kernel module is too old to handle metadata from %s.",
 		    cp->provider->name);
 		return (EINVAL);
 	}
 	if (error != 0) {
 		G_RAID3_DEBUG(1, "MD5 metadata hash mismatch for provider %s.",
 		    cp->provider->name);
 		return (error);
 	}
 	if (md->md_sectorsize > MAXPHYS) {
 		G_RAID3_DEBUG(0, "The blocksize is too big.");
 		return (EINVAL);
 	}
 
 	return (0);
 }
 
 static int
 g_raid3_check_metadata(struct g_raid3_softc *sc, struct g_provider *pp,
     struct g_raid3_metadata *md)
 {
 
 	if (md->md_no >= sc->sc_ndisks) {
 		G_RAID3_DEBUG(1, "Invalid disk %s number (no=%u), skipping.",
 		    pp->name, md->md_no);
 		return (EINVAL);
 	}
 	if (sc->sc_disks[md->md_no].d_state != G_RAID3_DISK_STATE_NODISK) {
 		G_RAID3_DEBUG(1, "Disk %s (no=%u) already exists, skipping.",
 		    pp->name, md->md_no);
 		return (EEXIST);
 	}
 	if (md->md_all != sc->sc_ndisks) {
 		G_RAID3_DEBUG(1,
 		    "Invalid '%s' field on disk %s (device %s), skipping.",
 		    "md_all", pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	if ((md->md_mediasize % md->md_sectorsize) != 0) {
 		G_RAID3_DEBUG(1, "Invalid metadata (mediasize %% sectorsize != "
 		    "0) on disk %s (device %s), skipping.", pp->name,
 		    sc->sc_name);
 		return (EINVAL);
 	}
 	if (md->md_mediasize != sc->sc_mediasize) {
 		G_RAID3_DEBUG(1,
 		    "Invalid '%s' field on disk %s (device %s), skipping.",
 		    "md_mediasize", pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	if ((md->md_mediasize % (sc->sc_ndisks - 1)) != 0) {
 		G_RAID3_DEBUG(1,
 		    "Invalid '%s' field on disk %s (device %s), skipping.",
 		    "md_mediasize", pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	if ((sc->sc_mediasize / (sc->sc_ndisks - 1)) > pp->mediasize) {
 		G_RAID3_DEBUG(1,
 		    "Invalid size of disk %s (device %s), skipping.", pp->name,
 		    sc->sc_name);
 		return (EINVAL);
 	}
 	if ((md->md_sectorsize / pp->sectorsize) < sc->sc_ndisks - 1) {
 		G_RAID3_DEBUG(1,
 		    "Invalid '%s' field on disk %s (device %s), skipping.",
 		    "md_sectorsize", pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	if (md->md_sectorsize != sc->sc_sectorsize) {
 		G_RAID3_DEBUG(1,
 		    "Invalid '%s' field on disk %s (device %s), skipping.",
 		    "md_sectorsize", pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	if ((sc->sc_sectorsize % pp->sectorsize) != 0) {
 		G_RAID3_DEBUG(1,
 		    "Invalid sector size of disk %s (device %s), skipping.",
 		    pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	if ((md->md_mflags & ~G_RAID3_DEVICE_FLAG_MASK) != 0) {
 		G_RAID3_DEBUG(1,
 		    "Invalid device flags on disk %s (device %s), skipping.",
 		    pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	if ((md->md_mflags & G_RAID3_DEVICE_FLAG_VERIFY) != 0 &&
 	    (md->md_mflags & G_RAID3_DEVICE_FLAG_ROUND_ROBIN) != 0) {
 		/*
 		 * VERIFY and ROUND-ROBIN options are mutally exclusive.
 		 */
 		G_RAID3_DEBUG(1, "Both VERIFY and ROUND-ROBIN flags exist on "
 		    "disk %s (device %s), skipping.", pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	if ((md->md_dflags & ~G_RAID3_DISK_FLAG_MASK) != 0) {
 		G_RAID3_DEBUG(1,
 		    "Invalid disk flags on disk %s (device %s), skipping.",
 		    pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	return (0);
 }
 
 int
 g_raid3_add_disk(struct g_raid3_softc *sc, struct g_provider *pp,
     struct g_raid3_metadata *md)
 {
 	struct g_raid3_disk *disk;
 	int error;
 
 	g_topology_assert_not();
 	G_RAID3_DEBUG(2, "Adding disk %s.", pp->name);
 
 	error = g_raid3_check_metadata(sc, pp, md);
 	if (error != 0)
 		return (error);
 	if (sc->sc_state != G_RAID3_DEVICE_STATE_STARTING &&
 	    md->md_genid < sc->sc_genid) {
 		G_RAID3_DEBUG(0, "Component %s (device %s) broken, skipping.",
 		    pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	disk = g_raid3_init_disk(sc, pp, md, &error);
 	if (disk == NULL)
 		return (error);
 	error = g_raid3_event_send(disk, G_RAID3_DISK_STATE_NEW,
 	    G_RAID3_EVENT_WAIT);
 	if (error != 0)
 		return (error);
 	if (md->md_version < G_RAID3_VERSION) {
 		G_RAID3_DEBUG(0, "Upgrading metadata on %s (v%d->v%d).",
 		    pp->name, md->md_version, G_RAID3_VERSION);
 		g_raid3_update_metadata(disk);
 	}
 	return (0);
 }
 
 static void
 g_raid3_destroy_delayed(void *arg, int flag)
 {
 	struct g_raid3_softc *sc;
 	int error;
 
 	if (flag == EV_CANCEL) {
 		G_RAID3_DEBUG(1, "Destroying canceled.");
 		return;
 	}
 	sc = arg;
 	g_topology_unlock();
 	sx_xlock(&sc->sc_lock);
 	KASSERT((sc->sc_flags & G_RAID3_DEVICE_FLAG_DESTROY) == 0,
 	    ("DESTROY flag set on %s.", sc->sc_name));
 	KASSERT((sc->sc_flags & G_RAID3_DEVICE_FLAG_DESTROYING) != 0,
 	    ("DESTROYING flag not set on %s.", sc->sc_name));
 	G_RAID3_DEBUG(0, "Destroying %s (delayed).", sc->sc_name);
 	error = g_raid3_destroy(sc, G_RAID3_DESTROY_SOFT);
 	if (error != 0) {
 		G_RAID3_DEBUG(0, "Cannot destroy %s.", sc->sc_name);
 		sx_xunlock(&sc->sc_lock);
 	}
 	g_topology_lock();
 }
 
 static int
 g_raid3_access(struct g_provider *pp, int acr, int acw, int ace)
 {
 	struct g_raid3_softc *sc;
 	int dcr, dcw, dce, error = 0;
 
 	g_topology_assert();
 	G_RAID3_DEBUG(2, "Access request for %s: r%dw%de%d.", pp->name, acr,
 	    acw, ace);
 
 	sc = pp->geom->softc;
 	if (sc == NULL && acr <= 0 && acw <= 0 && ace <= 0)
 		return (0);
 	KASSERT(sc != NULL, ("NULL softc (provider=%s).", pp->name));
 
 	dcr = pp->acr + acr;
 	dcw = pp->acw + acw;
 	dce = pp->ace + ace;
 
 	g_topology_unlock();
 	sx_xlock(&sc->sc_lock);
 	if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_DESTROY) != 0 ||
 	    g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) < sc->sc_ndisks - 1) {
 		if (acr > 0 || acw > 0 || ace > 0)
 			error = ENXIO;
 		goto end;
 	}
 	if (dcw == 0)
 		g_raid3_idle(sc, dcw);
 	if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_DESTROYING) != 0) {
 		if (acr > 0 || acw > 0 || ace > 0) {
 			error = ENXIO;
 			goto end;
 		}
 		if (dcr == 0 && dcw == 0 && dce == 0) {
 			g_post_event(g_raid3_destroy_delayed, sc, M_WAITOK,
 			    sc, NULL);
 		}
 	}
 end:
 	sx_xunlock(&sc->sc_lock);
 	g_topology_lock();
 	return (error);
 }
 
 static struct g_geom *
 g_raid3_create(struct g_class *mp, const struct g_raid3_metadata *md)
 {
 	struct g_raid3_softc *sc;
 	struct g_geom *gp;
 	int error, timeout;
 	u_int n;
 
 	g_topology_assert();
 	G_RAID3_DEBUG(1, "Creating device %s (id=%u).", md->md_name, md->md_id);
 
 	/* One disk is minimum. */
 	if (md->md_all < 1)
 		return (NULL);
 	/*
 	 * Action geom.
 	 */
 	gp = g_new_geomf(mp, "%s", md->md_name);
 	sc = malloc(sizeof(*sc), M_RAID3, M_WAITOK | M_ZERO);
 	sc->sc_disks = malloc(sizeof(struct g_raid3_disk) * md->md_all, M_RAID3,
 	    M_WAITOK | M_ZERO);
 	gp->start = g_raid3_start;
 	gp->orphan = g_raid3_orphan;
 	gp->access = g_raid3_access;
 	gp->dumpconf = g_raid3_dumpconf;
 
 	sc->sc_id = md->md_id;
 	sc->sc_mediasize = md->md_mediasize;
 	sc->sc_sectorsize = md->md_sectorsize;
 	sc->sc_ndisks = md->md_all;
 	sc->sc_round_robin = 0;
 	sc->sc_flags = md->md_mflags;
 	sc->sc_bump_id = 0;
 	sc->sc_idle = 1;
 	sc->sc_last_write = time_uptime;
 	sc->sc_writes = 0;
 	for (n = 0; n < sc->sc_ndisks; n++) {
 		sc->sc_disks[n].d_softc = sc;
 		sc->sc_disks[n].d_no = n;
 		sc->sc_disks[n].d_state = G_RAID3_DISK_STATE_NODISK;
 	}
 	sx_init(&sc->sc_lock, "graid3:lock");
 	bioq_init(&sc->sc_queue);
 	mtx_init(&sc->sc_queue_mtx, "graid3:queue", NULL, MTX_DEF);
 	bioq_init(&sc->sc_regular_delayed);
 	bioq_init(&sc->sc_inflight);
 	bioq_init(&sc->sc_sync_delayed);
 	TAILQ_INIT(&sc->sc_events);
 	mtx_init(&sc->sc_events_mtx, "graid3:events", NULL, MTX_DEF);
 	callout_init(&sc->sc_callout, 1);
 	sc->sc_state = G_RAID3_DEVICE_STATE_STARTING;
 	gp->softc = sc;
 	sc->sc_geom = gp;
 	sc->sc_provider = NULL;
 	/*
 	 * Synchronization geom.
 	 */
 	gp = g_new_geomf(mp, "%s.sync", md->md_name);
 	gp->softc = sc;
 	gp->orphan = g_raid3_orphan;
 	sc->sc_sync.ds_geom = gp;
 
 	if (!g_raid3_use_malloc) {
 		sc->sc_zones[G_RAID3_ZONE_64K].sz_zone = uma_zcreate("gr3:64k",
 		    65536, g_raid3_uma_ctor, g_raid3_uma_dtor, NULL, NULL,
 		    UMA_ALIGN_PTR, 0);
 		sc->sc_zones[G_RAID3_ZONE_64K].sz_inuse = 0;
 		sc->sc_zones[G_RAID3_ZONE_64K].sz_max = g_raid3_n64k;
 		sc->sc_zones[G_RAID3_ZONE_64K].sz_requested =
 		    sc->sc_zones[G_RAID3_ZONE_64K].sz_failed = 0;
 		sc->sc_zones[G_RAID3_ZONE_16K].sz_zone = uma_zcreate("gr3:16k",
 		    16384, g_raid3_uma_ctor, g_raid3_uma_dtor, NULL, NULL,
 		    UMA_ALIGN_PTR, 0);
 		sc->sc_zones[G_RAID3_ZONE_16K].sz_inuse = 0;
 		sc->sc_zones[G_RAID3_ZONE_16K].sz_max = g_raid3_n16k;
 		sc->sc_zones[G_RAID3_ZONE_16K].sz_requested =
 		    sc->sc_zones[G_RAID3_ZONE_16K].sz_failed = 0;
 		sc->sc_zones[G_RAID3_ZONE_4K].sz_zone = uma_zcreate("gr3:4k",
 		    4096, g_raid3_uma_ctor, g_raid3_uma_dtor, NULL, NULL,
 		    UMA_ALIGN_PTR, 0);
 		sc->sc_zones[G_RAID3_ZONE_4K].sz_inuse = 0;
 		sc->sc_zones[G_RAID3_ZONE_4K].sz_max = g_raid3_n4k;
 		sc->sc_zones[G_RAID3_ZONE_4K].sz_requested =
 		    sc->sc_zones[G_RAID3_ZONE_4K].sz_failed = 0;
 	}
 
 	error = kproc_create(g_raid3_worker, sc, &sc->sc_worker, 0, 0,
 	    "g_raid3 %s", md->md_name);
 	if (error != 0) {
 		G_RAID3_DEBUG(1, "Cannot create kernel thread for %s.",
 		    sc->sc_name);
 		if (!g_raid3_use_malloc) {
 			uma_zdestroy(sc->sc_zones[G_RAID3_ZONE_64K].sz_zone);
 			uma_zdestroy(sc->sc_zones[G_RAID3_ZONE_16K].sz_zone);
 			uma_zdestroy(sc->sc_zones[G_RAID3_ZONE_4K].sz_zone);
 		}
 		g_destroy_geom(sc->sc_sync.ds_geom);
 		mtx_destroy(&sc->sc_events_mtx);
 		mtx_destroy(&sc->sc_queue_mtx);
 		sx_destroy(&sc->sc_lock);
 		g_destroy_geom(sc->sc_geom);
 		free(sc->sc_disks, M_RAID3);
 		free(sc, M_RAID3);
 		return (NULL);
 	}
 
 	G_RAID3_DEBUG(1, "Device %s created (%u components, id=%u).",
 	    sc->sc_name, sc->sc_ndisks, sc->sc_id);
 
 	sc->sc_rootmount = root_mount_hold("GRAID3");
 	G_RAID3_DEBUG(1, "root_mount_hold %p", sc->sc_rootmount);
 
 	/*
 	 * Run timeout.
 	 */
 	timeout = atomic_load_acq_int(&g_raid3_timeout);
 	callout_reset(&sc->sc_callout, timeout * hz, g_raid3_go, sc);
 	return (sc->sc_geom);
 }
 
 int
 g_raid3_destroy(struct g_raid3_softc *sc, int how)
 {
 	struct g_provider *pp;
 
 	g_topology_assert_not();
 	if (sc == NULL)
 		return (ENXIO);
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	pp = sc->sc_provider;
 	if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
 		switch (how) {
 		case G_RAID3_DESTROY_SOFT:
 			G_RAID3_DEBUG(1,
 			    "Device %s is still open (r%dw%de%d).", pp->name,
 			    pp->acr, pp->acw, pp->ace);
 			return (EBUSY);
 		case G_RAID3_DESTROY_DELAYED:
 			G_RAID3_DEBUG(1,
 			    "Device %s will be destroyed on last close.",
 			    pp->name);
 			if (sc->sc_syncdisk != NULL)
 				g_raid3_sync_stop(sc, 1);
 			sc->sc_flags |= G_RAID3_DEVICE_FLAG_DESTROYING;
 			return (EBUSY);
 		case G_RAID3_DESTROY_HARD:
 			G_RAID3_DEBUG(1, "Device %s is still open, so it "
 			    "can't be definitely removed.", pp->name);
 			break;
 		}
 	}
 
 	g_topology_lock();
 	if (sc->sc_geom->softc == NULL) {
 		g_topology_unlock();
 		return (0);
 	}
 	sc->sc_geom->softc = NULL;
 	sc->sc_sync.ds_geom->softc = NULL;
 	g_topology_unlock();
 
 	sc->sc_flags |= G_RAID3_DEVICE_FLAG_DESTROY;
 	sc->sc_flags |= G_RAID3_DEVICE_FLAG_WAIT;
 	G_RAID3_DEBUG(4, "%s: Waking up %p.", __func__, sc);
 	sx_xunlock(&sc->sc_lock);
 	mtx_lock(&sc->sc_queue_mtx);
 	wakeup(sc);
 	wakeup(&sc->sc_queue);
 	mtx_unlock(&sc->sc_queue_mtx);
 	G_RAID3_DEBUG(4, "%s: Sleeping %p.", __func__, &sc->sc_worker);
 	while (sc->sc_worker != NULL)
 		tsleep(&sc->sc_worker, PRIBIO, "r3:destroy", hz / 5);
 	G_RAID3_DEBUG(4, "%s: Woken up %p.", __func__, &sc->sc_worker);
 	sx_xlock(&sc->sc_lock);
 	g_raid3_destroy_device(sc);
 	free(sc->sc_disks, M_RAID3);
 	free(sc, M_RAID3);
 	return (0);
 }
 
 static void
 g_raid3_taste_orphan(struct g_consumer *cp)
 {
 
 	KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
 	    cp->provider->name));
 }
 
 static struct g_geom *
 g_raid3_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
 {
 	struct g_raid3_metadata md;
 	struct g_raid3_softc *sc;
 	struct g_consumer *cp;
 	struct g_geom *gp;
 	int error;
 
 	g_topology_assert();
 	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
 	G_RAID3_DEBUG(2, "Tasting %s.", pp->name);
 
 	gp = g_new_geomf(mp, "raid3:taste");
 	/* This orphan function should be never called. */
 	gp->orphan = g_raid3_taste_orphan;
 	cp = g_new_consumer(gp);
 	g_attach(cp, pp);
 	error = g_raid3_read_metadata(cp, &md);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 	g_destroy_geom(gp);
 	if (error != 0)
 		return (NULL);
 	gp = NULL;
 
 	if (md.md_provider[0] != '\0' &&
 	    !g_compare_names(md.md_provider, pp->name))
 		return (NULL);
 	if (md.md_provsize != 0 && md.md_provsize != pp->mediasize)
 		return (NULL);
 	if (g_raid3_debug >= 2)
 		raid3_metadata_dump(&md);
 
 	/*
 	 * Let's check if device already exists.
 	 */
 	sc = NULL;
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		sc = gp->softc;
 		if (sc == NULL)
 			continue;
 		if (sc->sc_sync.ds_geom == gp)
 			continue;
 		if (strcmp(md.md_name, sc->sc_name) != 0)
 			continue;
 		if (md.md_id != sc->sc_id) {
 			G_RAID3_DEBUG(0, "Device %s already configured.",
 			    sc->sc_name);
 			return (NULL);
 		}
 		break;
 	}
 	if (gp == NULL) {
 		gp = g_raid3_create(mp, &md);
 		if (gp == NULL) {
 			G_RAID3_DEBUG(0, "Cannot create device %s.",
 			    md.md_name);
 			return (NULL);
 		}
 		sc = gp->softc;
 	}
 	G_RAID3_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
 	g_topology_unlock();
 	sx_xlock(&sc->sc_lock);
 	error = g_raid3_add_disk(sc, pp, &md);
 	if (error != 0) {
 		G_RAID3_DEBUG(0, "Cannot add disk %s to %s (error=%d).",
 		    pp->name, gp->name, error);
 		if (g_raid3_ndisks(sc, G_RAID3_DISK_STATE_NODISK) ==
 		    sc->sc_ndisks) {
 			g_cancel_event(sc);
 			g_raid3_destroy(sc, G_RAID3_DESTROY_HARD);
 			g_topology_lock();
 			return (NULL);
 		}
 		gp = NULL;
 	}
 	sx_xunlock(&sc->sc_lock);
 	g_topology_lock();
 	return (gp);
 }
 
 static int
 g_raid3_destroy_geom(struct gctl_req *req __unused, struct g_class *mp __unused,
     struct g_geom *gp)
 {
 	struct g_raid3_softc *sc;
 	int error;
 
 	g_topology_unlock();
 	sc = gp->softc;
 	sx_xlock(&sc->sc_lock);
 	g_cancel_event(sc);
 	error = g_raid3_destroy(gp->softc, G_RAID3_DESTROY_SOFT);
 	if (error != 0)
 		sx_xunlock(&sc->sc_lock);
 	g_topology_lock();
 	return (error);
 }
 
 static void
 g_raid3_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
     struct g_consumer *cp, struct g_provider *pp)
 {
 	struct g_raid3_softc *sc;
 
 	g_topology_assert();
 
 	sc = gp->softc;
 	if (sc == NULL)
 		return;
 	/* Skip synchronization geom. */
 	if (gp == sc->sc_sync.ds_geom)
 		return;
 	if (pp != NULL) {
 		/* Nothing here. */
 	} else if (cp != NULL) {
 		struct g_raid3_disk *disk;
 
 		disk = cp->private;
 		if (disk == NULL)
 			return;
 		g_topology_unlock();
 		sx_xlock(&sc->sc_lock);
 		sbuf_printf(sb, "%s<Type>", indent);
 		if (disk->d_no == sc->sc_ndisks - 1)
 			sbuf_cat(sb, "PARITY");
 		else
 			sbuf_cat(sb, "DATA");
 		sbuf_cat(sb, "</Type>\n");
 		sbuf_printf(sb, "%s<Number>%u</Number>\n", indent,
 		    (u_int)disk->d_no);
 		if (disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING) {
 			sbuf_printf(sb, "%s<Synchronized>", indent);
 			if (disk->d_sync.ds_offset == 0)
 				sbuf_cat(sb, "0%");
 			else {
 				sbuf_printf(sb, "%u%%",
 				    (u_int)((disk->d_sync.ds_offset * 100) /
 				    (sc->sc_mediasize / (sc->sc_ndisks - 1))));
 			}
 			sbuf_cat(sb, "</Synchronized>\n");
 			if (disk->d_sync.ds_offset > 0) {
 				sbuf_printf(sb, "%s<BytesSynced>%jd"
 				    "</BytesSynced>\n", indent,
 				    (intmax_t)disk->d_sync.ds_offset);
 			}
 		}
 		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent,
 		    disk->d_sync.ds_syncid);
 		sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent, disk->d_genid);
 		sbuf_printf(sb, "%s<Flags>", indent);
 		if (disk->d_flags == 0)
 			sbuf_cat(sb, "NONE");
 		else {
 			int first = 1;
 
 #define	ADD_FLAG(flag, name)	do {					\
 	if ((disk->d_flags & (flag)) != 0) {				\
 		if (!first)						\
 			sbuf_cat(sb, ", ");				\
 		else							\
 			first = 0;					\
 		sbuf_cat(sb, name);					\
 	}								\
 } while (0)
 			ADD_FLAG(G_RAID3_DISK_FLAG_DIRTY, "DIRTY");
 			ADD_FLAG(G_RAID3_DISK_FLAG_HARDCODED, "HARDCODED");
 			ADD_FLAG(G_RAID3_DISK_FLAG_SYNCHRONIZING,
 			    "SYNCHRONIZING");
 			ADD_FLAG(G_RAID3_DISK_FLAG_FORCE_SYNC, "FORCE_SYNC");
 			ADD_FLAG(G_RAID3_DISK_FLAG_BROKEN, "BROKEN");
 #undef	ADD_FLAG
 		}
 		sbuf_cat(sb, "</Flags>\n");
 		sbuf_printf(sb, "%s<State>%s</State>\n", indent,
 		    g_raid3_disk_state2str(disk->d_state));
 		sx_xunlock(&sc->sc_lock);
 		g_topology_lock();
 	} else {
 		g_topology_unlock();
 		sx_xlock(&sc->sc_lock);
 		if (!g_raid3_use_malloc) {
 			sbuf_printf(sb,
 			    "%s<Zone4kRequested>%u</Zone4kRequested>\n", indent,
 			    sc->sc_zones[G_RAID3_ZONE_4K].sz_requested);
 			sbuf_printf(sb,
 			    "%s<Zone4kFailed>%u</Zone4kFailed>\n", indent,
 			    sc->sc_zones[G_RAID3_ZONE_4K].sz_failed);
 			sbuf_printf(sb,
 			    "%s<Zone16kRequested>%u</Zone16kRequested>\n", indent,
 			    sc->sc_zones[G_RAID3_ZONE_16K].sz_requested);
 			sbuf_printf(sb,
 			    "%s<Zone16kFailed>%u</Zone16kFailed>\n", indent,
 			    sc->sc_zones[G_RAID3_ZONE_16K].sz_failed);
 			sbuf_printf(sb,
 			    "%s<Zone64kRequested>%u</Zone64kRequested>\n", indent,
 			    sc->sc_zones[G_RAID3_ZONE_64K].sz_requested);
 			sbuf_printf(sb,
 			    "%s<Zone64kFailed>%u</Zone64kFailed>\n", indent,
 			    sc->sc_zones[G_RAID3_ZONE_64K].sz_failed);
 		}
 		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id);
 		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent, sc->sc_syncid);
 		sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent, sc->sc_genid);
 		sbuf_printf(sb, "%s<Flags>", indent);
 		if (sc->sc_flags == 0)
 			sbuf_cat(sb, "NONE");
 		else {
 			int first = 1;
 
 #define	ADD_FLAG(flag, name)	do {					\
 	if ((sc->sc_flags & (flag)) != 0) {				\
 		if (!first)						\
 			sbuf_cat(sb, ", ");				\
 		else							\
 			first = 0;					\
 		sbuf_cat(sb, name);					\
 	}								\
 } while (0)
 			ADD_FLAG(G_RAID3_DEVICE_FLAG_NOFAILSYNC, "NOFAILSYNC");
 			ADD_FLAG(G_RAID3_DEVICE_FLAG_NOAUTOSYNC, "NOAUTOSYNC");
 			ADD_FLAG(G_RAID3_DEVICE_FLAG_ROUND_ROBIN,
 			    "ROUND-ROBIN");
 			ADD_FLAG(G_RAID3_DEVICE_FLAG_VERIFY, "VERIFY");
 #undef	ADD_FLAG
 		}
 		sbuf_cat(sb, "</Flags>\n");
 		sbuf_printf(sb, "%s<Components>%u</Components>\n", indent,
 		    sc->sc_ndisks);
 		sbuf_printf(sb, "%s<State>%s</State>\n", indent,
 		    g_raid3_device_state2str(sc->sc_state));
 		sx_xunlock(&sc->sc_lock);
 		g_topology_lock();
 	}
 }
 
 static void
 g_raid3_shutdown_post_sync(void *arg, int howto)
 {
 	struct g_class *mp;
 	struct g_geom *gp, *gp2;
 	struct g_raid3_softc *sc;
 	int error;
 
 	mp = arg;
 	g_topology_lock();
 	g_raid3_shutdown = 1;
 	LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) {
 		if ((sc = gp->softc) == NULL)
 			continue;
 		/* Skip synchronization geom. */
 		if (gp == sc->sc_sync.ds_geom)
 			continue;
 		g_topology_unlock();
 		sx_xlock(&sc->sc_lock);
 		g_raid3_idle(sc, -1);
 		g_cancel_event(sc);
 		error = g_raid3_destroy(sc, G_RAID3_DESTROY_DELAYED);
 		if (error != 0)
 			sx_xunlock(&sc->sc_lock);
 		g_topology_lock();
 	}
 	g_topology_unlock();
 }
 
 static void
 g_raid3_init(struct g_class *mp)
 {
 
 	g_raid3_post_sync = EVENTHANDLER_REGISTER(shutdown_post_sync,
 	    g_raid3_shutdown_post_sync, mp, SHUTDOWN_PRI_FIRST);
 	if (g_raid3_post_sync == NULL)
 		G_RAID3_DEBUG(0, "Warning! Cannot register shutdown event.");
 }
 
 static void
 g_raid3_fini(struct g_class *mp)
 {
 
 	if (g_raid3_post_sync != NULL)
 		EVENTHANDLER_DEREGISTER(shutdown_post_sync, g_raid3_post_sync);
 }
 
 DECLARE_GEOM_CLASS(g_raid3_class, g_raid3);
 MODULE_VERSION(geom_raid3, 0);
Index: head/sys/geom/raid3/g_raid3.h
===================================================================
--- head/sys/geom/raid3/g_raid3.h	(revision 350693)
+++ head/sys/geom/raid3/g_raid3.h	(revision 350694)
@@ -1,478 +1,460 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2004-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef	_G_RAID3_H_
 #define	_G_RAID3_H_
 
 #include <sys/endian.h>
 #include <sys/md5.h>
 
 #define	G_RAID3_CLASS_NAME	"RAID3"
 
 #define	G_RAID3_MAGIC		"GEOM::RAID3"
 /*
  * Version history:
  * 0 - Initial version number.
  * 1 - Added 'round-robin reading' algorithm.
  * 2 - Added 'verify reading' algorithm.
  * 3 - Added md_genid field to metadata.
  * 4 - Added md_provsize field to metadata.
  * 5 - Added 'no failure synchronization' flag.
  */
 #define	G_RAID3_VERSION		5
 
 #define	G_RAID3_DISK_FLAG_DIRTY		0x0000000000000001ULL
 #define	G_RAID3_DISK_FLAG_SYNCHRONIZING	0x0000000000000002ULL
 #define	G_RAID3_DISK_FLAG_FORCE_SYNC	0x0000000000000004ULL
 #define	G_RAID3_DISK_FLAG_HARDCODED	0x0000000000000008ULL
 #define	G_RAID3_DISK_FLAG_BROKEN	0x0000000000000010ULL
 #define	G_RAID3_DISK_FLAG_MASK		(G_RAID3_DISK_FLAG_DIRTY |	\
 					 G_RAID3_DISK_FLAG_SYNCHRONIZING | \
 					 G_RAID3_DISK_FLAG_FORCE_SYNC)
 
 #define	G_RAID3_DEVICE_FLAG_NOAUTOSYNC	0x0000000000000001ULL
 #define	G_RAID3_DEVICE_FLAG_ROUND_ROBIN	0x0000000000000002ULL
 #define	G_RAID3_DEVICE_FLAG_VERIFY	0x0000000000000004ULL
 #define	G_RAID3_DEVICE_FLAG_NOFAILSYNC	0x0000000000000008ULL
 #define	G_RAID3_DEVICE_FLAG_MASK	(G_RAID3_DEVICE_FLAG_NOAUTOSYNC | \
 					 G_RAID3_DEVICE_FLAG_ROUND_ROBIN | \
 					 G_RAID3_DEVICE_FLAG_VERIFY | \
 					 G_RAID3_DEVICE_FLAG_NOFAILSYNC)
 
 #ifdef _KERNEL
 extern u_int g_raid3_debug;
 
-#define	G_RAID3_DEBUG(lvl, ...)	do {					\
-	if (g_raid3_debug >= (lvl)) {					\
-		printf("GEOM_RAID3");					\
-		if (g_raid3_debug > 0)					\
-			printf("[%u]", lvl);				\
-		printf(": ");						\
-		printf(__VA_ARGS__);					\
-		printf("\n");						\
-	}								\
-} while (0)
-#define	G_RAID3_LOGREQ(lvl, bp, ...)	do {				\
-	if (g_raid3_debug >= (lvl)) {					\
-		printf("GEOM_RAID3");					\
-		if (g_raid3_debug > 0)					\
-			printf("[%u]", lvl);				\
-		printf(": ");						\
-		printf(__VA_ARGS__);					\
-		printf(" ");						\
-		g_print_bio(bp);					\
-		printf("\n");						\
-	}								\
-} while (0)
+#define	G_RAID3_DEBUG(lvl, ...) \
+    _GEOM_DEBUG("GEOM_RAID3", g_raid3_debug, (lvl), NULL, __VA_ARGS__)
+#define	G_RAID3_LOGREQ(lvl, bp, ...) \
+    _GEOM_DEBUG("GEOM_RAID3", g_raid3_debug, (lvl), (bp), __VA_ARGS__)
 
 #define	G_RAID3_BIO_CFLAG_REGULAR	0x01
 #define	G_RAID3_BIO_CFLAG_SYNC		0x02
 #define	G_RAID3_BIO_CFLAG_PARITY	0x04
 #define	G_RAID3_BIO_CFLAG_NODISK	0x08
 #define	G_RAID3_BIO_CFLAG_REGSYNC	0x10
 #define	G_RAID3_BIO_CFLAG_MASK		(G_RAID3_BIO_CFLAG_REGULAR |	\
 					 G_RAID3_BIO_CFLAG_SYNC |	\
 					 G_RAID3_BIO_CFLAG_PARITY |	\
 					 G_RAID3_BIO_CFLAG_NODISK |	\
 					 G_RAID3_BIO_CFLAG_REGSYNC)
 
 #define	G_RAID3_BIO_PFLAG_DEGRADED	0x01
 #define	G_RAID3_BIO_PFLAG_NOPARITY	0x02
 #define	G_RAID3_BIO_PFLAG_VERIFY	0x04
 #define	G_RAID3_BIO_PFLAG_MASK		(G_RAID3_BIO_PFLAG_DEGRADED |	\
 					 G_RAID3_BIO_PFLAG_NOPARITY |	\
 					 G_RAID3_BIO_PFLAG_VERIFY)
 
 /*
  * Informations needed for synchronization.
  */
 struct g_raid3_disk_sync {
 	struct g_consumer *ds_consumer;	/* Consumer connected to our device. */
 	off_t		  ds_offset;	/* Offset of next request to send. */
 	off_t		  ds_offset_done; /* Offset of already synchronized
 					   region. */
 	off_t		  ds_resync;	/* Resynchronize from this offset. */
 	u_int		  ds_syncid;	/* Disk's synchronization ID. */
 	u_int		  ds_inflight;	/* Number of in-flight sync requests. */
 	struct bio	**ds_bios;	/* BIOs for synchronization I/O. */
 };
 
 /*
  * Informations needed for synchronization.
  */
 struct g_raid3_device_sync {
 	struct g_geom	*ds_geom;	/* Synchronization geom. */
 };
 
 #define	G_RAID3_DISK_STATE_NODISK		0
 #define	G_RAID3_DISK_STATE_NONE			1
 #define	G_RAID3_DISK_STATE_NEW			2
 #define	G_RAID3_DISK_STATE_ACTIVE		3
 #define	G_RAID3_DISK_STATE_STALE		4
 #define	G_RAID3_DISK_STATE_SYNCHRONIZING	5
 #define	G_RAID3_DISK_STATE_DISCONNECTED		6
 #define	G_RAID3_DISK_STATE_DESTROY		7
 struct g_raid3_disk {
 	u_int		 d_no;		/* Disk number. */
 	struct g_consumer *d_consumer;	/* Consumer. */
 	struct g_raid3_softc *d_softc;	/* Back-pointer to softc. */
 	int		 d_state;	/* Disk state. */
 	uint64_t	 d_flags;	/* Additional flags. */
 	u_int		 d_genid;	/* Disk's generation ID. */
 	struct g_raid3_disk_sync d_sync; /* Sync information. */
 	LIST_ENTRY(g_raid3_disk) d_next;
 };
 #define	d_name	d_consumer->provider->name
 
 #define	G_RAID3_EVENT_DONTWAIT	0x1
 #define	G_RAID3_EVENT_WAIT	0x2
 #define	G_RAID3_EVENT_DEVICE	0x4
 #define	G_RAID3_EVENT_DONE	0x8
 struct g_raid3_event {
 	struct g_raid3_disk	*e_disk;
 	int			 e_state;
 	int			 e_flags;
 	int			 e_error;
 	TAILQ_ENTRY(g_raid3_event) e_next;
 };
 
 #define	G_RAID3_DEVICE_FLAG_DESTROY	0x0100000000000000ULL
 #define	G_RAID3_DEVICE_FLAG_WAIT	0x0200000000000000ULL
 #define	G_RAID3_DEVICE_FLAG_DESTROYING	0x0400000000000000ULL
 
 #define	G_RAID3_DEVICE_STATE_STARTING		0
 #define	G_RAID3_DEVICE_STATE_DEGRADED		1
 #define	G_RAID3_DEVICE_STATE_COMPLETE		2
 
 /* Bump syncid on first write. */
 #define	G_RAID3_BUMP_SYNCID	0x1
 /* Bump genid immediately. */
 #define	G_RAID3_BUMP_GENID	0x2
 
 enum g_raid3_zones {
 	G_RAID3_ZONE_64K,
 	G_RAID3_ZONE_16K,
 	G_RAID3_ZONE_4K,
 	G_RAID3_NUM_ZONES
 };
 
 static __inline enum g_raid3_zones
 g_raid3_zone(size_t nbytes) {
 	if (nbytes > 65536)
 		return (G_RAID3_NUM_ZONES);
 	else if (nbytes > 16384)
 		return (G_RAID3_ZONE_64K);
 	else if (nbytes > 4096)
 		return (G_RAID3_ZONE_16K);
 	else
 		return (G_RAID3_ZONE_4K);
 };
 
 struct g_raid3_softc {
 	u_int		sc_state;	/* Device state. */
 	uint64_t	sc_mediasize;	/* Device size. */
 	uint32_t	sc_sectorsize;	/* Sector size. */
 	uint64_t	sc_flags;	/* Additional flags. */
 
 	struct g_geom	*sc_geom;
 	struct g_provider *sc_provider;
 
 	uint32_t	sc_id;		/* Device unique ID. */
 
 	struct sx	 sc_lock;
 	struct bio_queue_head sc_queue;
 	struct mtx	 sc_queue_mtx;
 	struct proc	*sc_worker;
 	struct bio_queue_head sc_regular_delayed; /* Delayed I/O requests due
 						     collision with sync
 						     requests. */
 	struct bio_queue_head sc_inflight; /* In-flight regular write
 					      requests. */
 	struct bio_queue_head sc_sync_delayed; /* Delayed sync requests due
 						  collision with regular
 						  requests. */
 
 	struct g_raid3_disk *sc_disks;
 	u_int		sc_ndisks;	/* Number of disks. */
 	u_int		sc_round_robin;
 	struct g_raid3_disk *sc_syncdisk;
 
 	struct g_raid3_zone {
 		uma_zone_t	sz_zone;
 		size_t		sz_inuse;
 		size_t		sz_max;
 		u_int		sz_requested;
 		u_int		sz_failed;
 	} sc_zones[G_RAID3_NUM_ZONES];
 
 	u_int		sc_genid;	/* Generation ID. */
 	u_int		sc_syncid;	/* Synchronization ID. */
 	int		sc_bump_id;
 	struct g_raid3_device_sync sc_sync;
 	int		sc_idle;	/* DIRTY flags removed. */
 	time_t		sc_last_write;
 	u_int		sc_writes;
 
 	TAILQ_HEAD(, g_raid3_event) sc_events;
 	struct mtx	sc_events_mtx;
 
 	struct callout	sc_callout;
 
 	struct root_hold_token *sc_rootmount;
 };
 #define	sc_name	sc_geom->name
 
 const char *g_raid3_get_diskname(struct g_raid3_disk *disk);
 u_int g_raid3_ndisks(struct g_raid3_softc *sc, int state);
 #define	G_RAID3_DESTROY_SOFT	0
 #define	G_RAID3_DESTROY_DELAYED	1
 #define	G_RAID3_DESTROY_HARD	2
 int g_raid3_destroy(struct g_raid3_softc *sc, int how);
 int g_raid3_event_send(void *arg, int state, int flags);
 struct g_raid3_metadata;
 int g_raid3_add_disk(struct g_raid3_softc *sc, struct g_provider *pp,
     struct g_raid3_metadata *md);
 int g_raid3_read_metadata(struct g_consumer *cp, struct g_raid3_metadata *md);
 void g_raid3_fill_metadata(struct g_raid3_disk *disk,
     struct g_raid3_metadata *md);
 int g_raid3_clear_metadata(struct g_raid3_disk *disk);
 void g_raid3_update_metadata(struct g_raid3_disk *disk);
 
 g_ctl_req_t g_raid3_config;
 #endif	/* _KERNEL */
 
 struct g_raid3_metadata {
 	char		md_magic[16];	/* Magic value. */
 	uint32_t	md_version;	/* Version number. */
 	char		md_name[16];	/* Device name. */
 	uint32_t	md_id;		/* Device unique ID. */
 	uint16_t	md_no;		/* Component number. */
 	uint16_t	md_all;		/* Number of disks in device. */
 	uint32_t	md_genid;	/* Generation ID. */
 	uint32_t	md_syncid;	/* Synchronization ID. */
 	uint64_t	md_mediasize;	/* Size of whole device. */
 	uint32_t	md_sectorsize;	/* Sector size. */
 	uint64_t	md_sync_offset;	/* Synchronized offset. */
 	uint64_t	md_mflags;	/* Additional device flags. */
 	uint64_t	md_dflags;	/* Additional disk flags. */
 	char		md_provider[16]; /* Hardcoded provider. */
 	uint64_t	md_provsize;	/* Provider's size. */
 	u_char		md_hash[16];	/* MD5 hash. */
 };
 static __inline void
 raid3_metadata_encode(struct g_raid3_metadata *md, u_char *data)
 {
 	MD5_CTX ctx;
 
 	bcopy(md->md_magic, data, 16);
 	le32enc(data + 16, md->md_version);
 	bcopy(md->md_name, data + 20, 16);
 	le32enc(data + 36, md->md_id);
 	le16enc(data + 40, md->md_no);
 	le16enc(data + 42, md->md_all);
 	le32enc(data + 44, md->md_genid);
 	le32enc(data + 48, md->md_syncid);
 	le64enc(data + 52, md->md_mediasize);
 	le32enc(data + 60, md->md_sectorsize);
 	le64enc(data + 64, md->md_sync_offset);
 	le64enc(data + 72, md->md_mflags);
 	le64enc(data + 80, md->md_dflags);
 	bcopy(md->md_provider, data + 88, 16);
 	le64enc(data + 104, md->md_provsize);
 	MD5Init(&ctx);
 	MD5Update(&ctx, data, 112);
 	MD5Final(md->md_hash, &ctx);
 	bcopy(md->md_hash, data + 112, 16);
 }
 static __inline int
 raid3_metadata_decode_v0v1v2(const u_char *data, struct g_raid3_metadata *md)
 {
 	MD5_CTX ctx;
 
 	bcopy(data + 20, md->md_name, 16);
 	md->md_id = le32dec(data + 36);
 	md->md_no = le16dec(data + 40);
 	md->md_all = le16dec(data + 42);
 	md->md_syncid = le32dec(data + 44);
 	md->md_mediasize = le64dec(data + 48);
 	md->md_sectorsize = le32dec(data + 56);
 	md->md_sync_offset = le64dec(data + 60);
 	md->md_mflags = le64dec(data + 68);
 	md->md_dflags = le64dec(data + 76);
 	bcopy(data + 84, md->md_provider, 16);
 	bcopy(data + 100, md->md_hash, 16);
 	MD5Init(&ctx);
 	MD5Update(&ctx, data, 100);
 	MD5Final(md->md_hash, &ctx);
 	if (bcmp(md->md_hash, data + 100, 16) != 0)
 		return (EINVAL);
 
 	/* New fields. */
 	md->md_genid = 0;
 	md->md_provsize = 0;
 
 	return (0);
 }
 static __inline int
 raid3_metadata_decode_v3(const u_char *data, struct g_raid3_metadata *md)
 {
 	MD5_CTX ctx;
 
 	bcopy(data + 20, md->md_name, 16);
 	md->md_id = le32dec(data + 36);
 	md->md_no = le16dec(data + 40);
 	md->md_all = le16dec(data + 42);
 	md->md_genid = le32dec(data + 44);
 	md->md_syncid = le32dec(data + 48);
 	md->md_mediasize = le64dec(data + 52);
 	md->md_sectorsize = le32dec(data + 60);
 	md->md_sync_offset = le64dec(data + 64);
 	md->md_mflags = le64dec(data + 72);
 	md->md_dflags = le64dec(data + 80);
 	bcopy(data + 88, md->md_provider, 16);
 	bcopy(data + 104, md->md_hash, 16);
 	MD5Init(&ctx);
 	MD5Update(&ctx, data, 104);
 	MD5Final(md->md_hash, &ctx);
 	if (bcmp(md->md_hash, data + 104, 16) != 0)
 		return (EINVAL);
 
 	/* New fields. */
 	md->md_provsize = 0;
 
 	return (0);
 }
 static __inline int
 raid3_metadata_decode_v4v5(const u_char *data, struct g_raid3_metadata *md)
 {
 	MD5_CTX ctx;
 
 	bcopy(data + 20, md->md_name, 16);
 	md->md_id = le32dec(data + 36);
 	md->md_no = le16dec(data + 40);
 	md->md_all = le16dec(data + 42);
 	md->md_genid = le32dec(data + 44);
 	md->md_syncid = le32dec(data + 48);
 	md->md_mediasize = le64dec(data + 52);
 	md->md_sectorsize = le32dec(data + 60);
 	md->md_sync_offset = le64dec(data + 64);
 	md->md_mflags = le64dec(data + 72);
 	md->md_dflags = le64dec(data + 80);
 	bcopy(data + 88, md->md_provider, 16);
 	md->md_provsize = le64dec(data + 104);
 	bcopy(data + 112, md->md_hash, 16);
 	MD5Init(&ctx);
 	MD5Update(&ctx, data, 112);
 	MD5Final(md->md_hash, &ctx);
 	if (bcmp(md->md_hash, data + 112, 16) != 0)
 		return (EINVAL);
 	return (0);
 }
 static __inline int
 raid3_metadata_decode(const u_char *data, struct g_raid3_metadata *md)
 {
 	int error;
 
 	bcopy(data, md->md_magic, 16);
 	md->md_version = le32dec(data + 16);
 	switch (md->md_version) {
 	case 0:
 	case 1:
 	case 2:
 		error = raid3_metadata_decode_v0v1v2(data, md);
 		break;
 	case 3:
 		error = raid3_metadata_decode_v3(data, md);
 		break;
 	case 4:
 	case 5:
 		error = raid3_metadata_decode_v4v5(data, md);
 		break;
 	default:
 		error = EINVAL;
 		break;
 	}
 	return (error);
 }
 
 static __inline void
 raid3_metadata_dump(const struct g_raid3_metadata *md)
 {
 	static const char hex[] = "0123456789abcdef";
 	char hash[16 * 2 + 1];
 	u_int i;
 
 	printf("     magic: %s\n", md->md_magic);
 	printf("   version: %u\n", (u_int)md->md_version);
 	printf("      name: %s\n", md->md_name);
 	printf("        id: %u\n", (u_int)md->md_id);
 	printf("        no: %u\n", (u_int)md->md_no);
 	printf("       all: %u\n", (u_int)md->md_all);
 	printf("     genid: %u\n", (u_int)md->md_genid);
 	printf("    syncid: %u\n", (u_int)md->md_syncid);
 	printf(" mediasize: %jd\n", (intmax_t)md->md_mediasize);
 	printf("sectorsize: %u\n", (u_int)md->md_sectorsize);
 	printf("syncoffset: %jd\n", (intmax_t)md->md_sync_offset);
 	printf("    mflags:");
 	if (md->md_mflags == 0)
 		printf(" NONE");
 	else {
 		if ((md->md_mflags & G_RAID3_DEVICE_FLAG_NOAUTOSYNC) != 0)
 			printf(" NOAUTOSYNC");
 		if ((md->md_mflags & G_RAID3_DEVICE_FLAG_ROUND_ROBIN) != 0)
 			printf(" ROUND-ROBIN");
 		if ((md->md_mflags & G_RAID3_DEVICE_FLAG_VERIFY) != 0)
 			printf(" VERIFY");
 		if ((md->md_mflags & G_RAID3_DEVICE_FLAG_NOFAILSYNC) != 0)
 			printf(" NOFAILSYNC");
 	}
 	printf("\n");
 	printf("    dflags:");
 	if (md->md_dflags == 0)
 		printf(" NONE");
 	else {
 		if ((md->md_dflags & G_RAID3_DISK_FLAG_DIRTY) != 0)
 			printf(" DIRTY");
 		if ((md->md_dflags & G_RAID3_DISK_FLAG_SYNCHRONIZING) != 0)
 			printf(" SYNCHRONIZING");
 		if ((md->md_dflags & G_RAID3_DISK_FLAG_FORCE_SYNC) != 0)
 			printf(" FORCE_SYNC");
 	}
 	printf("\n");
 	printf("hcprovider: %s\n", md->md_provider);
 	printf("  provsize: %ju\n", (uintmax_t)md->md_provsize);
 	bzero(hash, sizeof(hash));
 	for (i = 0; i < 16; i++) {
 		hash[i * 2] = hex[md->md_hash[i] >> 4];
 		hash[i * 2 + 1] = hex[md->md_hash[i] & 0x0f];
 	}
 	printf("  MD5 hash: %s\n", hash);
 }
 #endif	/* !_G_RAID3_H_ */
Index: head/sys/geom/sched/g_sched.c
===================================================================
--- head/sys/geom/sched/g_sched.c	(revision 350693)
+++ head/sys/geom/sched/g_sched.c	(revision 350694)
@@ -1,1728 +1,1729 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2009-2010 Fabio Checconi
  * Copyright (c) 2009-2010 Luigi Rizzo, Universita` di Pisa
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * $Id$
  * $FreeBSD$
  *
  * Main control module for geom-based disk schedulers ('sched').
  *
  * USER VIEW
  * A 'sched' node is typically inserted transparently between
  * an existing provider pp and its original geom gp
  *
  *	[pp --> gp  ..]
  *
  * using the command "geom sched insert <provider>" and
  * resulting in the following topology
  *
  *	[pp --> sched_gp --> cp]   [new_pp --> gp ... ]
  *
  * Deletion "geom sched destroy <provider>.sched." restores the
  * original chain. The normal "geom sched create <provide>"
  * is also supported.
  *
  * INTERNALS
  * Internally, the 'sched' uses the following data structures
  *
  *   geom{}         g_sched_softc{}      g_gsched{}
  * +----------+    +---------------+   +-------------+
  * |  softc *-|--->| sc_gsched   *-|-->|  gs_init    |
  * |  ...     |    |               |   |  gs_fini    |
  * |          |    | [ hash table] |   |  gs_start   |
  * +----------+    |               |   |  ...        |
  *                 |               |   +-------------+
  *                 |               |
  *                 |               |     g_*_softc{}
  *                 |               |   +-------------+
  *                 | sc_data     *-|-->|             |
  *                 +---------------+   |  algorithm- |
  *                                     |  specific   |
  *                                     +-------------+
  *
  * A g_sched_softc{} is created with a "geom sched insert" call.
  * In turn this instantiates a specific scheduling algorithm,
  * which sets sc_gsched to point to the algorithm callbacks,
  * and calls gs_init() to create the g_*_softc{} .
  * The other callbacks (gs_start, gs_next, ...) are invoked
  * as needed 
  *
  * g_sched_softc{} is defined in g_sched.h and mostly used here;
  * g_gsched{}, and the gs_callbacks, are documented in gs_scheduler.h;
  * g_*_softc{} is defined/implemented by each algorithm (gs_*.c)
  *
  * DATA MOVING
  * When a bio is received on the provider, it goes to the
  * g_sched_start() which calls gs_start() to initially queue it;
  * then we call g_sched_dispatch() that loops around gs_next()
  * to select zero or more bio's to be sent downstream.
  *
  * g_sched_dispatch() can also be called as a result of a timeout,
  * e.g. when doing anticipation or pacing requests.
  *
  * When a bio comes back, it goes to g_sched_done() which in turn
  * calls gs_done(). The latter does any necessary housekeeping in
  * the scheduling algorithm, and may decide to call g_sched_dispatch()
  * to send more bio's downstream.
  *
  * If an algorithm needs per-flow queues, these are created
  * calling gs_init_class() and destroyed with gs_fini_class(),
  * and they are also inserted in the hash table implemented in
  * the g_sched_softc{}
  *
  * If an algorithm is replaced, or a transparently-inserted node is
  * removed with "geom sched destroy", we need to remove all references
  * to the g_*_softc{} and g_sched_softc from the bio's still in
  * the scheduler. g_sched_forced_dispatch() helps doing this.
  * XXX need to explain better.
  */
 
 #include <sys/cdefs.h>
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/bio.h>
 #include <sys/limits.h>
 #include <sys/hash.h>
 #include <sys/sbuf.h>
 #include <sys/sysctl.h>
 #include <sys/malloc.h>
 #include <sys/proc.h>		/* we access curthread */
 #include <geom/geom.h>
+#include <geom/geom_dbg.h>
 #include "gs_scheduler.h"
 #include "g_sched.h"		/* geom hooks */
 
 /*
  * Size of the per-geom hash table storing traffic classes.
  * We may decide to change it at a later time, it has no ABI
  * implications as it is only used for run-time allocations.
  */
 #define G_SCHED_HASH_SIZE	32
 
 static int g_sched_destroy(struct g_geom *gp, boolean_t force);
 static int g_sched_destroy_geom(struct gctl_req *req,
     struct g_class *mp, struct g_geom *gp);
 static void g_sched_config(struct gctl_req *req, struct g_class *mp,
     const char *verb);
 static struct g_geom *g_sched_taste(struct g_class *mp,
     struct g_provider *pp, int flags __unused);
 static void g_sched_dumpconf(struct sbuf *sb, const char *indent,
     struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp);
 static void g_sched_init(struct g_class *mp);
 static void g_sched_fini(struct g_class *mp);
 static int g_sched_ioctl(struct g_provider *pp, u_long cmd, void *data,
     int fflag, struct thread *td);
 
 struct g_class g_sched_class = {
 	.name = G_SCHED_CLASS_NAME,
 	.version = G_VERSION,
 	.ctlreq = g_sched_config,
 	.taste = g_sched_taste,
 	.destroy_geom = g_sched_destroy_geom,
 	.init = g_sched_init,
 	.ioctl = g_sched_ioctl,
 	.fini = g_sched_fini
 };
 
 MALLOC_DEFINE(M_GEOM_SCHED, "GEOM_SCHED", "Geom schedulers data structures");
 
 /*
  * Global variables describing the state of the geom_sched module.
  * There is only one static instance of this structure.
  */
 LIST_HEAD(gs_list, g_gsched);	/* type, link field */
 struct geom_sched_vars {
 	struct mtx	gs_mtx;
 	struct gs_list	gs_scheds;	/* list of algorithms */
 	u_int		gs_debug;
 	u_int		gs_sched_count;	/* how many algorithms ? */
 	u_int 		gs_patched;	/* g_io_request was patched */
 
 	u_int		gs_initialized;
 	u_int		gs_expire_secs;	/* expiration of hash entries */
 
 	struct bio_queue_head gs_pending;
 	u_int		gs_npending;
 
 	/* The following are for stats, usually protected by gs_mtx. */
 	u_long		gs_requests;	/* total requests */
 	u_long		gs_done;	/* total done */
 	u_int 		gs_in_flight;	/* requests in flight */
 	u_int 		gs_writes_in_flight;
 	u_int 		gs_bytes_in_flight;
 	u_int 		gs_write_bytes_in_flight;
 
 	char		gs_names[256];	/* names of schedulers */
 };
 
 static struct geom_sched_vars me = {
 	.gs_expire_secs = 10,
 };
 
 SYSCTL_DECL(_kern_geom);
 SYSCTL_NODE(_kern_geom, OID_AUTO, sched, CTLFLAG_RW, 0,
     "GEOM_SCHED stuff");
 
 SYSCTL_UINT(_kern_geom_sched, OID_AUTO, in_flight_wb, CTLFLAG_RD,
     &me.gs_write_bytes_in_flight, 0, "Write bytes in flight");
 
 SYSCTL_UINT(_kern_geom_sched, OID_AUTO, in_flight_b, CTLFLAG_RD,
     &me.gs_bytes_in_flight, 0, "Bytes in flight");
 
 SYSCTL_UINT(_kern_geom_sched, OID_AUTO, in_flight_w, CTLFLAG_RD,
     &me.gs_writes_in_flight, 0, "Write Requests in flight");
 
 SYSCTL_UINT(_kern_geom_sched, OID_AUTO, in_flight, CTLFLAG_RD,
     &me.gs_in_flight, 0, "Requests in flight");
 
 SYSCTL_ULONG(_kern_geom_sched, OID_AUTO, done, CTLFLAG_RD,
     &me.gs_done, 0, "Total done");
 
 SYSCTL_ULONG(_kern_geom_sched, OID_AUTO, requests, CTLFLAG_RD,
     &me.gs_requests, 0, "Total requests");
 
 SYSCTL_STRING(_kern_geom_sched, OID_AUTO, algorithms, CTLFLAG_RD,
     &me.gs_names, 0, "Algorithm names");
 
 SYSCTL_UINT(_kern_geom_sched, OID_AUTO, alg_count, CTLFLAG_RD,
     &me.gs_sched_count, 0, "Number of algorithms");
 
 SYSCTL_UINT(_kern_geom_sched, OID_AUTO, debug, CTLFLAG_RW,
     &me.gs_debug, 0, "Debug level");
 
 SYSCTL_UINT(_kern_geom_sched, OID_AUTO, expire_secs, CTLFLAG_RW,
     &me.gs_expire_secs, 0, "Expire time in seconds");
 
 /*
  * g_sched calls the scheduler algorithms with this lock held.
  * The locking functions are exposed so the scheduler algorithms can also
  * protect themselves e.g. when running a callout handler.
  */
 void
 g_sched_lock(struct g_geom *gp)
 {
 	struct g_sched_softc *sc = gp->softc;
 
 	mtx_lock(&sc->sc_mtx);
 }
 
 void
 g_sched_unlock(struct g_geom *gp)
 {
 	struct g_sched_softc *sc = gp->softc;
 
 	mtx_unlock(&sc->sc_mtx);
 }
 
 /*
  * Support functions to handle references to the module,
  * which are coming from devices using this scheduler.
  */
 static inline void
 g_gsched_ref(struct g_gsched *gsp)
 {
 
 	atomic_add_int(&gsp->gs_refs, 1);
 }
 
 static inline void
 g_gsched_unref(struct g_gsched *gsp)
 {
 
 	atomic_add_int(&gsp->gs_refs, -1);
 }
 
 /*
  * Update the stats when this request is done.
  */
 static void
 g_sched_update_stats(struct bio *bio)
 {
 
 	me.gs_done++;
 	me.gs_in_flight--;
 	me.gs_bytes_in_flight -= bio->bio_length;
 	if (bio->bio_cmd == BIO_WRITE) {
 		me.gs_writes_in_flight--;
 		me.gs_write_bytes_in_flight -= bio->bio_length;
 	}
 }
 
 /*
  * Dispatch any pending request.
  */
 static void
 g_sched_forced_dispatch(struct g_geom *gp)
 {
 	struct g_sched_softc *sc = gp->softc;
 	struct g_gsched *gsp = sc->sc_gsched;
 	struct bio *bp;
 
 	KASSERT(mtx_owned(&sc->sc_mtx),
 	    ("sc_mtx not owned during forced dispatch"));
 
 	while ((bp = gsp->gs_next(sc->sc_data, 1)) != NULL)
 		g_io_request(bp, LIST_FIRST(&gp->consumer));
 }
 
 /*
  * The main dispatch loop, called either here after the start
  * routine, or by scheduling algorithms when they receive a timeout
  * or a 'done' notification.  Does not share code with the forced
  * dispatch path, since the gs_done() callback can call us.
  */
 void
 g_sched_dispatch(struct g_geom *gp)
 {
 	struct g_sched_softc *sc = gp->softc;
 	struct g_gsched *gsp = sc->sc_gsched;
 	struct bio *bp;
 
 	KASSERT(mtx_owned(&sc->sc_mtx), ("sc_mtx not owned during dispatch"));
 
 	if ((sc->sc_flags & G_SCHED_FLUSHING))
 		return;
 
 	while ((bp = gsp->gs_next(sc->sc_data, 0)) != NULL)
 		g_io_request(bp, LIST_FIRST(&gp->consumer));
 }
 
 /*
  * Recent (8.0 and above) versions of FreeBSD have support to
  * register classifiers of disk requests. The classifier is
  * invoked by g_io_request(), and stores the information into
  * bp->bio_classifier1.
  *
  * Support for older versions, which is left here only for
  * documentation purposes, relies on two hacks:
  * 1. classification info is written into the bio_caller1
  *    field of the topmost node in the bio chain. This field
  *    is rarely used, but this module is incompatible with
  *    those that use bio_caller1 for other purposes,
  *    such as ZFS and gjournal;
  * 2. g_io_request() is patched in-memory when the module is
  *    loaded, so that the function calls a classifier as its
  *    first thing. g_io_request() is restored when the module
  *    is unloaded. This functionality is only supported for
  *    x86 and amd64, other architectures need source code changes.
  */
 
 /*
  * Lookup the identity of the issuer of the original request.
  * In the current implementation we use the curthread of the
  * issuer, but different mechanisms may be implemented later
  * so we do not make assumptions on the return value which for
  * us is just an opaque identifier.
  */
 
 static inline u_long
 g_sched_classify(struct bio *bp)
 {
 
 	/* we have classifier fields in the struct bio */
 	return ((u_long)bp->bio_classifier1);
 }
 
 /* Return the hash chain for the given key. */
 static inline struct g_hash *
 g_sched_hash(struct g_sched_softc *sc, u_long key)
 {
 
 	return (&sc->sc_hash[key & sc->sc_mask]);
 }
 
 /*
  * Helper function for the children classes, which takes
  * a geom and a bio and returns the private descriptor
  * associated to the request.  This involves fetching
  * the classification field and [al]locating the
  * corresponding entry in the hash table.
  */
 void *
 g_sched_get_class(struct g_geom *gp, struct bio *bp)
 {
 	struct g_sched_softc *sc;
 	struct g_sched_class *gsc;
 	struct g_gsched *gsp;
 	struct g_hash *bucket;
 	u_long key;
 
 	sc = gp->softc;
 	key = g_sched_classify(bp);
 	bucket = g_sched_hash(sc, key);
 	LIST_FOREACH(gsc, bucket, gsc_clist) {
 		if (key == gsc->gsc_key) {
 			gsc->gsc_refs++;
 			return (gsc->gsc_priv);
 		}
 	}
 
 	gsp = sc->sc_gsched;
 	gsc = malloc(sizeof(*gsc) + gsp->gs_priv_size,
 	    M_GEOM_SCHED, M_NOWAIT | M_ZERO);
 	if (!gsc)
 		return (NULL);
 
 	if (gsp->gs_init_class(sc->sc_data, gsc->gsc_priv)) {
 		free(gsc, M_GEOM_SCHED);
 		return (NULL);
 	}
 
 	gsc->gsc_refs = 2;	/* 1 for the hash table, 1 for the caller. */
 	gsc->gsc_key = key;
 	LIST_INSERT_HEAD(bucket, gsc, gsc_clist);
 
 	gsc->gsc_expire = ticks + me.gs_expire_secs * hz;
 
 	return (gsc->gsc_priv);
 }
 
 /*
  * Release a reference to the per-client descriptor,
  */
 void
 g_sched_put_class(struct g_geom *gp, void *priv)
 {
 	struct g_sched_class *gsc;
 	struct g_sched_softc *sc;
 
 	gsc = g_sched_priv2class(priv);
 	gsc->gsc_expire = ticks + me.gs_expire_secs * hz;
 
 	if (--gsc->gsc_refs > 0)
 		return;
 
 	sc = gp->softc;
 	sc->sc_gsched->gs_fini_class(sc->sc_data, priv);
 
 	LIST_REMOVE(gsc, gsc_clist);
 	free(gsc, M_GEOM_SCHED);
 }
 
 static void
 g_sched_hash_fini(struct g_geom *gp, struct g_hash *hp, u_long mask,
     struct g_gsched *gsp, void *data)
 {
 	struct g_sched_class *cp, *cp2;
 	int i;
 
 	if (!hp)
 		return;
 
 	if (data && gsp->gs_hash_unref)
 		gsp->gs_hash_unref(data);
 
 	for (i = 0; i < G_SCHED_HASH_SIZE; i++) {
 		LIST_FOREACH_SAFE(cp, &hp[i], gsc_clist, cp2)
 			g_sched_put_class(gp, cp->gsc_priv);
 	}
 
 	hashdestroy(hp, M_GEOM_SCHED, mask);
 }
 
 static struct g_hash *
 g_sched_hash_init(struct g_gsched *gsp, u_long *mask, int flags)
 {
 	struct g_hash *hash;
 
 	if (gsp->gs_priv_size == 0)
 		return (NULL);
 
 	hash = hashinit_flags(G_SCHED_HASH_SIZE, M_GEOM_SCHED, mask, flags);
 
 	return (hash);
 }
 
 static void
 g_sched_flush_classes(struct g_geom *gp)
 {
 	struct g_sched_softc *sc;
 	struct g_sched_class *cp, *cp2;
 	int i;
 
 	sc = gp->softc;
 
 	if (!sc->sc_hash || ticks - sc->sc_flush_ticks <= 0)
 		return;
 
 	for (i = 0; i < G_SCHED_HASH_SIZE; i++) {
 		LIST_FOREACH_SAFE(cp, &sc->sc_hash[i], gsc_clist, cp2) {
 			if (cp->gsc_refs == 1 && ticks - cp->gsc_expire > 0)
 				g_sched_put_class(gp, cp->gsc_priv);
 		}
 	}
 
 	sc->sc_flush_ticks = ticks + me.gs_expire_secs * hz;
 }
 
 /*
  * Wait for the completion of any outstanding request.  To ensure
  * that this does not take forever the caller has to make sure that
  * no new request enter the scehduler before calling us.
  *
  * Must be called with the gp mutex held and topology locked.
  */
 static int
 g_sched_wait_pending(struct g_geom *gp)
 {
 	struct g_sched_softc *sc = gp->softc;
 	int endticks = ticks + hz;
 
 	g_topology_assert();
 
 	while (sc->sc_pending && endticks - ticks >= 0)
 		msleep(gp, &sc->sc_mtx, 0, "sched_wait_pending", hz / 4);
 
 	return (sc->sc_pending ? ETIMEDOUT : 0);
 }
 
 static int
 g_sched_remove_locked(struct g_geom *gp, struct g_gsched *gsp)
 {
 	struct g_sched_softc *sc = gp->softc;
 	int error;
 
 	/* Set the flushing flag: new bios will not enter the scheduler. */
 	sc->sc_flags |= G_SCHED_FLUSHING;
 
 	g_sched_forced_dispatch(gp);
 	error = g_sched_wait_pending(gp);
 	if (error)
 		goto failed;
 	
 	/* No more requests pending or in flight from the old gsp. */
 
 	g_sched_hash_fini(gp, sc->sc_hash, sc->sc_mask, gsp, sc->sc_data);
 	sc->sc_hash = NULL;
 
 	/*
 	 * Avoid deadlock here by releasing the gp mutex and reacquiring
 	 * it once done.  It should be safe, since no reconfiguration or
 	 * destruction can take place due to the geom topology lock; no
 	 * new request can use the current sc_data since we flagged the
 	 * geom as being flushed.
 	 */
 	g_sched_unlock(gp);
 	gsp->gs_fini(sc->sc_data);
 	g_sched_lock(gp);
 
 	sc->sc_gsched = NULL;
 	sc->sc_data = NULL;
 	g_gsched_unref(gsp);
 
 failed:
 	sc->sc_flags &= ~G_SCHED_FLUSHING;
 
 	return (error);
 }
 
 static int
 g_sched_remove(struct g_geom *gp, struct g_gsched *gsp)
 {
 	int error;
 
 	g_sched_lock(gp);
 	error = g_sched_remove_locked(gp, gsp); /* gsp is surely non-null */
 	g_sched_unlock(gp);
 
 	return (error);
 }
 
 /*
  * Support function for create/taste -- locate the desired
  * algorithm and grab a reference to it.
  */
 static struct g_gsched *
 g_gsched_find(const char *name)
 {
 	struct g_gsched *gsp = NULL;
 
 	mtx_lock(&me.gs_mtx);
 	LIST_FOREACH(gsp, &me.gs_scheds, glist) {
 		if (strcmp(name, gsp->gs_name) == 0) {
 			g_gsched_ref(gsp);
 			break;
 		}
 	}
 	mtx_unlock(&me.gs_mtx);
 
 	return (gsp);
 }
 
 /*
  * Rebuild the list of scheduler names.
  * To be called with me.gs_mtx lock held.
  */
 static void
 g_gsched_build_names(struct g_gsched *gsp)
 {
 	int pos, l;
 	struct g_gsched *cur;
 
 	pos = 0;
 	LIST_FOREACH(cur, &me.gs_scheds, glist) {
 		l = strlen(cur->gs_name);
 		if (l + pos + 1 + 1 < sizeof(me.gs_names)) {
 			if (pos != 0)
 				me.gs_names[pos++] = ' ';
 			strcpy(me.gs_names + pos, cur->gs_name);
 			pos += l;
 		}
 	}
 	me.gs_names[pos] = '\0';
 }
 
 /*
  * Register or unregister individual scheduling algorithms.
  */
 static int
 g_gsched_register(struct g_gsched *gsp)
 {
 	struct g_gsched *cur;
 	int error = 0;
 
 	mtx_lock(&me.gs_mtx);
 	LIST_FOREACH(cur, &me.gs_scheds, glist) {
 		if (strcmp(gsp->gs_name, cur->gs_name) == 0)
 			break;
 	}
 	if (cur != NULL) {
 		G_SCHED_DEBUG(0, "A scheduler named %s already"
 		    "exists.", gsp->gs_name);
 		error = EEXIST;
 	} else {
 		LIST_INSERT_HEAD(&me.gs_scheds, gsp, glist);
 		gsp->gs_refs = 1;
 		me.gs_sched_count++;
 		g_gsched_build_names(gsp);
 	}
 	mtx_unlock(&me.gs_mtx);
 
 	return (error);
 }
 
 struct g_gsched_unregparm {
 	struct g_gsched *gup_gsp;
 	int		gup_error;
 };
 
 static void
 g_gsched_unregister(void *arg, int flag)
 {
 	struct g_gsched_unregparm *parm = arg;
 	struct g_gsched *gsp = parm->gup_gsp, *cur, *tmp;
 	struct g_sched_softc *sc;
 	struct g_geom *gp, *gp_tmp;
 	int error;
 
 	parm->gup_error = 0;
 
 	g_topology_assert();
 
 	if (flag == EV_CANCEL)
 		return;
 
 	mtx_lock(&me.gs_mtx);
 
 	LIST_FOREACH_SAFE(gp, &g_sched_class.geom, geom, gp_tmp) {
 		if (gp->class != &g_sched_class)
 			continue;	/* Should not happen. */
 
 		sc = gp->softc;
 		if (sc->sc_gsched == gsp) {
 			error = g_sched_remove(gp, gsp);
 			if (error)
 				goto failed;
 		}
 	}
 		
 	LIST_FOREACH_SAFE(cur, &me.gs_scheds, glist, tmp) {
 		if (cur != gsp)
 			continue;
 
 		if (gsp->gs_refs != 1) {
 			G_SCHED_DEBUG(0, "%s still in use.",
 			    gsp->gs_name);
 			parm->gup_error = EBUSY;
 		} else {
 			LIST_REMOVE(gsp, glist);
 			me.gs_sched_count--;
 			g_gsched_build_names(gsp);
 		}
 		break;
 	}
 
 	if (cur == NULL) {
 		G_SCHED_DEBUG(0, "%s not registered.", gsp->gs_name);
 		parm->gup_error = ENOENT;
 	}
 
 failed:
 	mtx_unlock(&me.gs_mtx);
 }
 
 static inline void
 g_gsched_global_init(void)
 {
 
 	if (!me.gs_initialized) {
 		G_SCHED_DEBUG(0, "Initializing global data.");
 		mtx_init(&me.gs_mtx, "gsched", NULL, MTX_DEF);
 		LIST_INIT(&me.gs_scheds);
 		bioq_init(&me.gs_pending);
 		me.gs_initialized = 1;
 	}
 }
 
 /*
  * Module event called when a scheduling algorithm module is loaded or
  * unloaded.
  */
 int
 g_gsched_modevent(module_t mod, int cmd, void *arg)
 {
 	struct g_gsched *gsp = arg;
 	struct g_gsched_unregparm parm;
 	int error;
 
 	G_SCHED_DEBUG(0, "Modevent %d.", cmd);
 
 	/*
 	 * If the module is loaded at boot, the geom thread that calls
 	 * g_sched_init() might actually run after g_gsched_modevent(),
 	 * so make sure that the module is properly initialized.
 	 */
 	g_gsched_global_init();
 
 	error = EOPNOTSUPP;
 	switch (cmd) {
 	case MOD_LOAD:
 		error = g_gsched_register(gsp);
 		G_SCHED_DEBUG(0, "Loaded module %s error %d.",
 		    gsp->gs_name, error);
 		if (error == 0)
 			g_retaste(&g_sched_class);
 		break;
 
 	case MOD_UNLOAD:
 		parm.gup_gsp = gsp;
 		parm.gup_error = 0;
 
 		error = g_waitfor_event(g_gsched_unregister,
 		    &parm, M_WAITOK, NULL);
 		if (error == 0)
 			error = parm.gup_error;
 		G_SCHED_DEBUG(0, "Unloaded module %s error %d.",
 		    gsp->gs_name, error);
 		break;
 	}
 
 	return (error);
 }
 
 #ifdef KTR
 #define	TRC_BIO_EVENT(e, bp)	g_sched_trace_bio_ ## e (bp)
 
 static inline char
 g_sched_type(struct bio *bp)
 {
 
 	if (bp->bio_cmd == BIO_READ)
 		return ('R');
 	else if (bp->bio_cmd == BIO_WRITE)
 		return ('W');
 	return ('U');
 }
 
 static inline void
 g_sched_trace_bio_START(struct bio *bp)
 {
 
 	CTR5(KTR_GSCHED, "S %lu %c %lu/%lu %lu", g_sched_classify(bp),
 	    g_sched_type(bp), bp->bio_offset / ULONG_MAX,
 	    bp->bio_offset, bp->bio_length);
 }
 
 static inline void
 g_sched_trace_bio_DONE(struct bio *bp)
 {
 
 	CTR5(KTR_GSCHED, "D %lu %c %lu/%lu %lu", g_sched_classify(bp),
 	    g_sched_type(bp), bp->bio_offset / ULONG_MAX,
 	    bp->bio_offset, bp->bio_length);
 }
 #else /* !KTR */
 #define	TRC_BIO_EVENT(e, bp)
 #endif /* !KTR */
 
 /*
  * g_sched_done() and g_sched_start() dispatch the geom requests to
  * the scheduling algorithm in use.
  */
 static void
 g_sched_done(struct bio *bio)
 {
 	struct g_geom *gp = bio->bio_caller2;
 	struct g_sched_softc *sc = gp->softc;
 
 	TRC_BIO_EVENT(DONE, bio);
 
 	KASSERT(bio->bio_caller1, ("null bio_caller1 in g_sched_done"));
 
 	g_sched_lock(gp);
 
 	g_sched_update_stats(bio);
 	sc->sc_gsched->gs_done(sc->sc_data, bio);
 	if (!--sc->sc_pending)
 		wakeup(gp);
 
 	g_sched_flush_classes(gp);
 	g_sched_unlock(gp);
 
 	g_std_done(bio);
 }
 
 static void
 g_sched_start(struct bio *bp)
 {
 	struct g_geom *gp = bp->bio_to->geom;
 	struct g_sched_softc *sc = gp->softc;
 	struct bio *cbp;
 
 	TRC_BIO_EVENT(START, bp);
 	G_SCHED_LOGREQ(bp, "Request received.");
 
 	cbp = g_clone_bio(bp);
 	if (cbp == NULL) {
 		g_io_deliver(bp, ENOMEM);
 		return;
 	}
 	cbp->bio_done = g_sched_done;
 	cbp->bio_to = LIST_FIRST(&gp->provider);
 	KASSERT(cbp->bio_to != NULL, ("NULL provider"));
 
 	/* We only schedule reads and writes. */
 	if (bp->bio_cmd != BIO_READ && bp->bio_cmd != BIO_WRITE)
 		goto bypass;
 
 	G_SCHED_LOGREQ(cbp, "Sending request.");
 
 	g_sched_lock(gp);
 	/*
 	 * Call the algorithm's gs_start to queue the request in the
 	 * scheduler. If gs_start fails then pass the request down,
 	 * otherwise call g_sched_dispatch() which tries to push
 	 * one or more requests down.
 	 */
 	if (!sc->sc_gsched || (sc->sc_flags & G_SCHED_FLUSHING) ||
 	    sc->sc_gsched->gs_start(sc->sc_data, cbp)) {
 		g_sched_unlock(gp);
 		goto bypass;
 	}
 	/*
 	 * We use bio_caller1 to mark requests that are scheduled
 	 * so make sure it is not NULL.
 	 */
 	if (cbp->bio_caller1 == NULL)
 		cbp->bio_caller1 = &me;	/* anything not NULL */
 
 	cbp->bio_caller2 = gp;
 	sc->sc_pending++;
 
 	/* Update general stats. */
 	me.gs_in_flight++;
 	me.gs_requests++;
 	me.gs_bytes_in_flight += bp->bio_length;
 	if (bp->bio_cmd == BIO_WRITE) {
 		me.gs_writes_in_flight++;
 		me.gs_write_bytes_in_flight += bp->bio_length;
 	}
 	g_sched_dispatch(gp);
 	g_sched_unlock(gp);
 	return;
 
 bypass:
 	cbp->bio_done = g_std_done;
 	cbp->bio_caller1 = NULL; /* not scheduled */
 	g_io_request(cbp, LIST_FIRST(&gp->consumer));
 }
 
 /*
  * The next few functions are the geom glue.
  */
 static void
 g_sched_orphan(struct g_consumer *cp)
 {
 
 	g_topology_assert();
 	g_sched_destroy(cp->geom, 1);
 }
 
 static int
 g_sched_access(struct g_provider *pp, int dr, int dw, int de)
 {
 	struct g_geom *gp;
 	struct g_consumer *cp;
 	int error;
 
 	gp = pp->geom;
 	cp = LIST_FIRST(&gp->consumer);
 	error = g_access(cp, dr, dw, de);
 
 	return (error);
 }
 
 static void
 g_sched_temporary_start(struct bio *bio)
 {
 
 	mtx_lock(&me.gs_mtx);
 	me.gs_npending++;
 	bioq_disksort(&me.gs_pending, bio);
 	mtx_unlock(&me.gs_mtx);
 }
 
 static void
 g_sched_flush_pending(g_start_t *start)
 {
 	struct bio *bp;
 
 	while ((bp = bioq_takefirst(&me.gs_pending)))
 		start(bp);
 }
 
 static int
 g_insert_proxy(struct g_geom *gp, struct g_provider *newpp,
     struct g_geom *dstgp, struct g_provider *pp, struct g_consumer *cp)
 {
 	struct g_sched_softc *sc = gp->softc;
 	g_start_t *saved_start, *flush = g_sched_start;
 	int error = 0, endticks = ticks + hz;
 
 	g_cancel_event(newpp);	/* prevent taste() */
 	/* copy private fields */
 	newpp->private = pp->private;
 	newpp->index = pp->index;
 
 	/* Queue all the early requests coming for us. */
 	me.gs_npending = 0;
 	saved_start = pp->geom->start;
 	dstgp->start = g_sched_temporary_start;
 
 	while (pp->nstart - pp->nend != me.gs_npending &&
 	    endticks - ticks >= 0)
 		tsleep(pp, PRIBIO, "-", hz/10);
 
 	if (pp->nstart - pp->nend != me.gs_npending) {
 		flush = saved_start;
 		error = ETIMEDOUT;
 		goto fail;
 	}
 
 	/* link pp to this geom */
 	LIST_REMOVE(pp, provider);
 	pp->geom = gp;
 	LIST_INSERT_HEAD(&gp->provider, pp, provider);
 
 	/*
 	 * replicate the counts from the parent in the
 	 * new provider and consumer nodes
 	 */
 	cp->acr = newpp->acr = pp->acr;
 	cp->acw = newpp->acw = pp->acw;
 	cp->ace = newpp->ace = pp->ace;
 	sc->sc_flags |= G_SCHED_PROXYING;
 
 fail:
 	dstgp->start = saved_start;
 
 	g_sched_flush_pending(flush);
 
 	return (error);
 }
 
 /*
  * Create a geom node for the device passed as *pp.
  * If successful, add a reference to this gsp.
  */
 static int
 g_sched_create(struct gctl_req *req, struct g_class *mp,
     struct g_provider *pp, struct g_gsched *gsp, int proxy)
 {
 	struct g_sched_softc *sc = NULL;
 	struct g_geom *gp, *dstgp;
 	struct g_provider *newpp = NULL;
 	struct g_consumer *cp = NULL;
 	char name[64];
 	int error;
 
 	g_topology_assert();
 
 	snprintf(name, sizeof(name), "%s%s", pp->name, G_SCHED_SUFFIX);
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		if (strcmp(gp->name, name) == 0) {
 			gctl_error(req, "Geom %s already exists.",
 			    name);
 			return (EEXIST);
 		}
 	}
 
 	gp = g_new_geomf(mp, "%s", name);
 	dstgp = proxy ? pp->geom : gp; /* where do we link the provider */
 
 	sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO);
 	sc->sc_gsched = gsp;
 	sc->sc_data = gsp->gs_init(gp);
 	if (sc->sc_data == NULL) {
 		error = ENOMEM;
 		goto fail;
 	}
 
 	sc->sc_hash = g_sched_hash_init(gsp, &sc->sc_mask, HASH_WAITOK);
 
 	/*
 	 * Do not initialize the flush mechanism, will be initialized
 	 * on the first insertion on the hash table.
 	 */
 
 	mtx_init(&sc->sc_mtx, "g_sched_mtx", NULL, MTX_DEF);
 
 	gp->softc = sc;
 	gp->start = g_sched_start;
 	gp->orphan = g_sched_orphan;
 	gp->access = g_sched_access;
 	gp->dumpconf = g_sched_dumpconf;
 
 	newpp = g_new_providerf(dstgp, "%s", gp->name);
 	newpp->mediasize = pp->mediasize;
 	newpp->sectorsize = pp->sectorsize;
 
 	cp = g_new_consumer(gp);
 	error = g_attach(cp, proxy ? newpp : pp);
 	if (error != 0) {
 		gctl_error(req, "Cannot attach to provider %s.",
 		    pp->name);
 		goto fail;
 	}
 
 	g_error_provider(newpp, 0);
 	if (proxy) {
 		error = g_insert_proxy(gp, newpp, dstgp, pp, cp);
 		if (error)
 			goto fail;
 	}
 	G_SCHED_DEBUG(0, "Device %s created.", gp->name);
 
 	g_gsched_ref(gsp);
 
 	return (0);
 
 fail:
 	if (cp != NULL) {
 		if (cp->provider != NULL)
 			g_detach(cp);
 		g_destroy_consumer(cp);
 	}
 	if (newpp != NULL)
 		g_destroy_provider(newpp);
 	if (sc->sc_hash)
 		g_sched_hash_fini(gp, sc->sc_hash, sc->sc_mask,
 		    gsp, sc->sc_data);
 	if (sc->sc_data)
 		gsp->gs_fini(sc->sc_data);
 	g_free(gp->softc);
 	g_destroy_geom(gp);
 
 	return (error);
 }
 
 /*
  * Support for dynamic switching of scheduling algorithms.
  * First initialize the data structures for the new algorithm,
  * then call g_sched_remove_locked() to flush all references
  * to the old one, finally link the new algorithm.
  */
 static int
 g_sched_change_algo(struct gctl_req *req, struct g_class *mp,
     struct g_provider *pp, struct g_gsched *gsp)
 {
 	struct g_sched_softc *sc;
 	struct g_geom *gp;
 	struct g_hash *newh;
 	void *data;
 	u_long mask;
 	int error = 0;
 
 	gp = pp->geom;
 	sc = gp->softc;
 
 	data = gsp->gs_init(gp);
 	if (data == NULL)
 		return (ENOMEM);
 
 	newh = g_sched_hash_init(gsp, &mask, HASH_WAITOK);
 	if (gsp->gs_priv_size && !newh) {
 		error = ENOMEM;
 		goto fail;
 	}
 
 	g_sched_lock(gp);
 	if (sc->sc_gsched) {	/* can be NULL in some cases */
 		error = g_sched_remove_locked(gp, sc->sc_gsched);
 		if (error)
 			goto fail;
 	}
 
 	g_gsched_ref(gsp);
 	sc->sc_gsched = gsp;
 	sc->sc_data = data;
 	sc->sc_hash = newh;
 	sc->sc_mask = mask;
 
 	g_sched_unlock(gp);
 
 	return (0);
 
 fail:
 	if (newh)
 		g_sched_hash_fini(gp, newh, mask, gsp, data);
 
 	if (data)
 		gsp->gs_fini(data);
 
 	g_sched_unlock(gp);
 
 	return (error);
 }
 
 /*
  * Stop the request flow directed to the proxy, redirecting the new
  * requests to the me.gs_pending queue.
  */
 static struct g_provider *
 g_detach_proxy(struct g_geom *gp)
 {
 	struct g_consumer *cp;
 	struct g_provider *pp, *newpp;
 
 	do {
 		pp = LIST_FIRST(&gp->provider);
 		if (pp == NULL)
 			break;
 		cp = LIST_FIRST(&gp->consumer);
 		if (cp == NULL)
 			break;
 		newpp = cp->provider;
 		if (newpp == NULL)
 			break;
 
 		me.gs_npending = 0;
 		pp->geom->start = g_sched_temporary_start;
 
 		return (pp);
 	} while (0);
 	printf("%s error detaching proxy %s\n", __FUNCTION__, gp->name);
 
 	return (NULL);
 }
 
 static void
 g_sched_blackhole(struct bio *bp)
 {
 
 	g_io_deliver(bp, ENXIO);
 }
 
 static inline void
 g_reparent_provider(struct g_provider *pp, struct g_geom *gp,
     struct g_provider *newpp)
 {
 
 	LIST_REMOVE(pp, provider);
 	if (newpp) {
 		pp->private = newpp->private;
 		pp->index = newpp->index;
 	}
 	pp->geom = gp;
 	LIST_INSERT_HEAD(&gp->provider, pp, provider);
 }
 
 static inline void
 g_unproxy_provider(struct g_provider *oldpp, struct g_provider *newpp)
 {
 	struct g_geom *gp = oldpp->geom;
 
 	g_reparent_provider(oldpp, newpp->geom, newpp);
 
 	/*
 	 * Hackish: let the system destroy the old provider for us, just
 	 * in case someone attached a consumer to it, in which case a
 	 * direct call to g_destroy_provider() would not work.
 	 */
 	g_reparent_provider(newpp, gp, NULL);
 }
 
 /*
  * Complete the proxy destruction, linking the old provider to its
  * original geom, and destroying the proxy provider.  Also take care
  * of issuing the pending requests collected in me.gs_pending (if any).
  */
 static int
 g_destroy_proxy(struct g_geom *gp, struct g_provider *oldpp)
 {
 	struct g_consumer *cp;
 	struct g_provider *newpp;
 
 	do {
 		cp = LIST_FIRST(&gp->consumer);
 		if (cp == NULL)
 			break;
 		newpp = cp->provider;
 		if (newpp == NULL)
 			break;
 
 		/* Relink the provider to its original geom. */
 		g_unproxy_provider(oldpp, newpp);
 
 		/* Detach consumer from provider, and destroy provider. */
 		cp->acr = newpp->acr = 0;
 		cp->acw = newpp->acw = 0;
 		cp->ace = newpp->ace = 0;
 		g_detach(cp);
 
 		/* Send the pending bios through the right start function. */
 		g_sched_flush_pending(oldpp->geom->start);
 
 		return (0);
 	} while (0);
 	printf("%s error destroying proxy %s\n", __FUNCTION__, gp->name);
 
 	/* We cannot send the pending bios anywhere... */
 	g_sched_flush_pending(g_sched_blackhole);
 
 	return (EINVAL);
 }
 
 static int
 g_sched_destroy(struct g_geom *gp, boolean_t force)
 {
 	struct g_provider *pp, *oldpp = NULL;
 	struct g_sched_softc *sc;
 	struct g_gsched *gsp;
 	int error;
 
 	g_topology_assert();
 	sc = gp->softc;
 	if (sc == NULL)
 		return (ENXIO);
 	if (!(sc->sc_flags & G_SCHED_PROXYING)) {
 		pp = LIST_FIRST(&gp->provider);
 		if (pp && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
 			const char *msg = force ?
 				"but we force removal" : "cannot remove";
 
 			G_SCHED_DEBUG(!force,
 			    "Device %s is still open (r%dw%de%d), %s.",
 			    pp->name, pp->acr, pp->acw, pp->ace, msg);
 			if (!force)
 				return (EBUSY);
 		} else {
 			G_SCHED_DEBUG(0, "Device %s removed.", gp->name);
 		}
 	} else
 		oldpp = g_detach_proxy(gp);
 
 	gsp = sc->sc_gsched;
 	if (gsp) {
 		/*
 		 * XXX bad hack here: force a dispatch to release
 		 * any reference to the hash table still held by
 		 * the scheduler.
 		 */
 		g_sched_lock(gp);
 		/*
 		 * We are dying here, no new requests should enter
 		 * the scheduler.  This is granted by the topolgy,
 		 * either in case we were proxying (new bios are
 		 * being redirected) or not (see the access check
 		 * above).
 		 */
 		g_sched_forced_dispatch(gp);
 		error = g_sched_wait_pending(gp);
 
 		if (error) {
 			/*
 			 * Not all the requests came home: this might happen
 			 * under heavy load, or if we were waiting for any
 			 * bio which is served in the event path (see
 			 * geom_slice.c for an example of how this can
 			 * happen).  Try to restore a working configuration
 			 * if we can fail.
 			 */
 			if ((sc->sc_flags & G_SCHED_PROXYING) && oldpp) {
 				g_sched_flush_pending(force ?
 				    g_sched_blackhole : g_sched_start);
 			}
 
 			/*
 			 * In the forced destroy case there is not so much
 			 * we can do, we have pending bios that will call
 			 * g_sched_done() somehow, and we don't want them
 			 * to crash the system using freed memory.  We tell
 			 * the user that something went wrong, and leak some
 			 * memory here.
 			 * Note: the callers using force = 1 ignore the
 			 * return value.
 			 */
 			if (force) {
 				G_SCHED_DEBUG(0, "Pending requests while "
 				    " destroying geom, some memory leaked.");
 			}
 
 			return (error);
 		}
 
 		g_sched_unlock(gp);
 		g_sched_hash_fini(gp, sc->sc_hash, sc->sc_mask,
 		    gsp, sc->sc_data);
 		sc->sc_hash = NULL;
 		gsp->gs_fini(sc->sc_data);
 		g_gsched_unref(gsp);
 		sc->sc_gsched = NULL;
 	} else
 		error = 0;
 
 	if ((sc->sc_flags & G_SCHED_PROXYING) && oldpp) {
 		error = g_destroy_proxy(gp, oldpp);
 
 		if (error) {
 			if (force) {
 				G_SCHED_DEBUG(0, "Unrecoverable error while "
 				    "destroying a proxy geom, leaking some "
 				    " memory.");
 			}
 
 			return (error);
 		}
 	}
 
 	mtx_destroy(&sc->sc_mtx);
 
 	g_free(gp->softc);
 	gp->softc = NULL;
 	g_wither_geom(gp, ENXIO);
 
 	return (error);
 }
 
 static int
 g_sched_destroy_geom(struct gctl_req *req, struct g_class *mp,
     struct g_geom *gp)
 {
 
 	return (g_sched_destroy(gp, 0));
 }
 
 /*
  * Functions related to the classification of requests.
  *
  * On recent FreeBSD versions (8.0 and above), we store a reference
  * to the issuer of a request in bp->bio_classifier1 as soon
  * as the bio is posted to the geom queue (and not later, because
  * requests are managed by the g_down thread afterwards).
  */
 
 /*
  * Classifier support for recent FreeBSD versions: we use
  * a very simple classifier, only use curthread to tag a request.
  * The classifier is registered at module load, and unregistered
  * at module unload.
  */
 static int
 g_sched_tag(void *arg, struct bio *bp)
 {
 
 	bp->bio_classifier1 = curthread;
 	return (1);
 }
 
 static struct g_classifier_hook g_sched_classifier = {
 	.func =	g_sched_tag,
 };
 
 static inline void
 g_classifier_ini(void)
 {
 
 	g_register_classifier(&g_sched_classifier);
 }
 
 static inline void
 g_classifier_fini(void)
 {
 
 	g_unregister_classifier(&g_sched_classifier);
 }
 
 static void
 g_sched_init(struct g_class *mp)
 {
 
 	g_gsched_global_init();
 
 	G_SCHED_DEBUG(0, "Loading: mp = %p, g_sched_class = %p.",
 	    mp, &g_sched_class);
 
 	/* Patch g_io_request to store classification info in the bio. */
 	g_classifier_ini();
 }
 
 static void
 g_sched_fini(struct g_class *mp)
 {
 
 	g_classifier_fini();
 
 	G_SCHED_DEBUG(0, "Unloading...");
 
 	KASSERT(LIST_EMPTY(&me.gs_scheds), ("still registered schedulers"));
 	mtx_destroy(&me.gs_mtx);
 }
 
 static int
 g_sched_ioctl(struct g_provider *pp, u_long cmd, void *data, int fflag,
     struct thread *td)
 {
 	struct g_consumer *cp;
 	struct g_geom *gp;
 
 	cp = LIST_FIRST(&pp->geom->consumer);
 	if (cp == NULL)
 		return (ENOIOCTL);
 	gp = cp->provider->geom;
 	if (gp->ioctl == NULL)
 		return (ENOIOCTL);
 	return (gp->ioctl(cp->provider, cmd, data, fflag, td));
 }
 
 /*
  * Read the i-th argument for a request, skipping the /dev/
  * prefix if present.
  */
 static const char *
 g_sched_argi(struct gctl_req *req, int i)
 {
 	static const char *dev_prefix = "/dev/";
 	const char *name;
 	char param[16];
 	int l = strlen(dev_prefix);
 
 	snprintf(param, sizeof(param), "arg%d", i);
 	name = gctl_get_asciiparam(req, param);
 	if (name == NULL)
 		gctl_error(req, "No 'arg%d' argument", i);
 	else if (strncmp(name, dev_prefix, l) == 0)
 		name += l;
 	return (name);
 }
 
 /*
  * Fetch nargs and do appropriate checks.
  */
 static int
 g_sched_get_nargs(struct gctl_req *req)
 {
 	int *nargs;
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "No 'nargs' argument");
 		return (0);
 	}
 	if (*nargs <= 0)
 		gctl_error(req, "Missing device(s).");
 	return (*nargs);
 }
 
 /*
  * Check whether we should add the class on certain volumes when
  * this geom is created. Right now this is under control of a kenv
  * variable containing the names of all devices that we care about.
  * Probably we should only support transparent insertion as the
  * preferred mode of operation.
  */
 static struct g_geom *
 g_sched_taste(struct g_class *mp, struct g_provider *pp,
 		int flags __unused)
 {
 	struct g_gsched *gsp = NULL;	/* the . algorithm we want */
 	const char *s;			/* generic string pointer */
 	const char *taste_names;	/* devices we like */
 	int l;
     
         g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__,
 	    mp->name, pp->name);
         g_topology_assert();
  
         G_SCHED_DEBUG(2, "Tasting %s.", pp->name);
 
 	do {
 		/* do not taste on ourselves */
 		if (pp->geom->class == mp)
                 	break;
 
 		taste_names = kern_getenv("geom.sched.taste");
 		if (taste_names == NULL)
 			break;
 
 		l = strlen(pp->name);
 		for (s = taste_names; *s &&
 		    (s = strstr(s, pp->name)); s++) {
 			/* further checks for an exact match */
 			if ( (s == taste_names || s[-1] == ' ') &&
 			     (s[l] == '\0' || s[l] == ' ') )
 				break;
 		}
 		if (s == NULL)
 			break;
 		G_SCHED_DEBUG(0, "Attach device %s match [%s]\n",
 		    pp->name, s);
 
 		/* look up the provider name in the list */
 		s = kern_getenv("geom.sched.algo");
 		if (s == NULL)
 			s = "rr";
 
 		gsp = g_gsched_find(s);	/* also get a reference */
 		if (gsp == NULL) {
 			G_SCHED_DEBUG(0, "Bad '%s' algorithm.", s);
 			break;
 		}
 
 		/* XXX create with 1 as last argument ? */
 		g_sched_create(NULL, mp, pp, gsp, 0);
 		g_gsched_unref(gsp);
 	} while (0);
 	return NULL;
 }
 
 static void
 g_sched_ctl_create(struct gctl_req *req, struct g_class *mp, int proxy)
 {
 	struct g_provider *pp;
 	struct g_gsched *gsp;
 	const char *name;
 	int i, nargs;
 
 	g_topology_assert();
 
 	name = gctl_get_asciiparam(req, "algo");
 	if (name == NULL) {
 		gctl_error(req, "No '%s' argument", "algo");
 		return;
 	}
 
 	gsp = g_gsched_find(name);	/* also get a reference */
 	if (gsp == NULL) {
 		gctl_error(req, "Bad algorithm '%s'", name);
 		return;
 	}
 
 	nargs = g_sched_get_nargs(req);
 
 	/*
 	 * Run on the arguments, and break on any error.
 	 * We look for a device name, but skip the /dev/ prefix if any.
 	 */
 	for (i = 0; i < nargs; i++) {
 		name = g_sched_argi(req, i);
 		if (name == NULL)
 			break;
 		pp = g_provider_by_name(name);
 		if (pp == NULL) {
 			G_SCHED_DEBUG(1, "Provider %s is invalid.", name);
 			gctl_error(req, "Provider %s is invalid.", name);
 			break;
 		}
 		if (g_sched_create(req, mp, pp, gsp, proxy) != 0)
 			break;
 	}
 
 	g_gsched_unref(gsp);
 }
 
 static void
 g_sched_ctl_configure(struct gctl_req *req, struct g_class *mp)
 {
 	struct g_provider *pp;
 	struct g_gsched *gsp;
 	const char *name;
 	int i, nargs;
 
 	g_topology_assert();
 
 	name = gctl_get_asciiparam(req, "algo");
 	if (name == NULL) {
 		gctl_error(req, "No '%s' argument", "algo");
 		return;
 	}
 
 	gsp = g_gsched_find(name);	/* also get a reference */
 	if (gsp == NULL) {
 		gctl_error(req, "Bad algorithm '%s'", name);
 		return;
 	}
 
 	nargs = g_sched_get_nargs(req);
 
 	/*
 	 * Run on the arguments, and break on any error.
 	 * We look for a device name, but skip the /dev/ prefix if any.
 	 */
 	for (i = 0; i < nargs; i++) {
 		name = g_sched_argi(req, i);
 		if (name == NULL)
 			break;
 		pp = g_provider_by_name(name);
 		if (pp == NULL || pp->geom->class != mp) {
 			G_SCHED_DEBUG(1, "Provider %s is invalid.", name);
 			gctl_error(req, "Provider %s is invalid.", name);
 			break;
 		}
 		if (g_sched_change_algo(req, mp, pp, gsp) != 0)
 			break;
 	}
 
 	g_gsched_unref(gsp);
 }
 
 static struct g_geom *
 g_sched_find_geom(struct g_class *mp, const char *name)
 {
 	struct g_geom *gp;
 
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		if (strcmp(gp->name, name) == 0)
 			return (gp);
 	}
 	return (NULL);
 }
 
 static void
 g_sched_ctl_destroy(struct gctl_req *req, struct g_class *mp)
 {
 	int nargs, *force, error, i;
 	struct g_geom *gp;
 	const char *name;
 
 	g_topology_assert();
 
 	nargs = g_sched_get_nargs(req);
 
 	force = gctl_get_paraml(req, "force", sizeof(*force));
 	if (force == NULL) {
 		gctl_error(req, "No 'force' argument");
 		return;
 	}
 
 	for (i = 0; i < nargs; i++) {
 		name = g_sched_argi(req, i);
 		if (name == NULL)
 			break;
 
 		gp = g_sched_find_geom(mp, name);
 		if (gp == NULL) {
 			G_SCHED_DEBUG(1, "Device %s is invalid.", name);
 			gctl_error(req, "Device %s is invalid.", name);
 			break;
 		}
 
 		error = g_sched_destroy(gp, *force);
 		if (error != 0) {
 			gctl_error(req, "Cannot destroy device %s (error=%d).",
 			    gp->name, error);
 			break;
 		}
 	}
 }
 
 static void
 g_sched_config(struct gctl_req *req, struct g_class *mp, const char *verb)
 {
 	uint32_t *version;
 
 	g_topology_assert();
 
 	version = gctl_get_paraml(req, "version", sizeof(*version));
 	if (version == NULL) {
 		gctl_error(req, "No '%s' argument.", "version");
 		return;
 	}
 
 	if (*version != G_SCHED_VERSION) {
 		gctl_error(req, "Userland and kernel parts are "
 		    "out of sync.");
 		return;
 	}
 
 	if (strcmp(verb, "create") == 0) {
 		g_sched_ctl_create(req, mp, 0);
 		return;
 	} else if (strcmp(verb, "insert") == 0) {
 		g_sched_ctl_create(req, mp, 1);
 		return;
 	} else if (strcmp(verb, "configure") == 0) {
 		g_sched_ctl_configure(req, mp);
 		return;
 	} else if (strcmp(verb, "destroy") == 0) {
 		g_sched_ctl_destroy(req, mp);
 		return;
 	}
 
 	gctl_error(req, "Unknown verb.");
 }
 
 static void
 g_sched_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
     struct g_consumer *cp, struct g_provider *pp)
 {
 	struct g_sched_softc *sc = gp->softc;
 	struct g_gsched *gsp = sc->sc_gsched;
 	if (indent == NULL) {	/* plaintext */
 		sbuf_printf(sb, " algo %s", gsp ? gsp->gs_name : "--");
 	}
 	if (gsp != NULL && gsp->gs_dumpconf)
 		gsp->gs_dumpconf(sb, indent, gp, cp, pp);
 }
 
 DECLARE_GEOM_CLASS(g_sched_class, g_sched);
 MODULE_VERSION(geom_sched, 0);
Index: head/sys/geom/sched/g_sched.h
===================================================================
--- head/sys/geom/sched/g_sched.h	(revision 350693)
+++ head/sys/geom/sched/g_sched.h	(revision 350694)
@@ -1,127 +1,111 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2009-2010 Fabio Checconi
  * Copyright (c) 2009-2010 Luigi Rizzo, Universita` di Pisa
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef	_G_SCHED_H_
 #define	_G_SCHED_H_
 
 /*
  * $Id$
  * $FreeBSD$
  *
  * Header for the geom_sched class (userland library and kernel part).
  * See g_sched.c for documentation.
  * The userland code only needs the three G_SCHED_* values below.
  */
 
 #define	G_SCHED_CLASS_NAME	"SCHED"
 #define	G_SCHED_VERSION		0
 #define	G_SCHED_SUFFIX		".sched."
 
 #ifdef _KERNEL
-#define	G_SCHED_DEBUG(lvl, ...)	do {				\
-	if (me.gs_debug >= (lvl)) {				\
-		printf("GEOM_SCHED");				\
-		if (me.gs_debug > 0)				\
-			printf("[%u]", lvl);			\
-		printf(": ");					\
-		printf(__VA_ARGS__);				\
-		printf("\n");					\
-	}							\
-} while (0)
-
-#define	G_SCHED_LOGREQ(bp, ...)	do {				\
-	if (me.gs_debug >= 2) {					\
-		printf("GEOM_SCHED[2]: ");			\
-		printf(__VA_ARGS__);				\
-		printf(" ");					\
-		g_print_bio(bp);				\
-		printf("\n");					\
-	}							\
-} while (0)
+#define	G_SCHED_DEBUG(lvl, ...) \
+    _GEOM_DEBUG("GEOM_SCHED", me.gs_debug, (lvl), NULL, __VA_ARGS__)
+#define	G_SCHED_LOGREQ(bp, ...) \
+    _GEOM_DEBUG("GEOM_SCHED", me.gs_debug, 2, (bp), __VA_ARGS__)
 
 LIST_HEAD(g_hash, g_sched_class);
 
 /*
  * Descriptor of a scheduler.
  * In addition to the obvious fields, sc_flushing and sc_pending
  * support dynamic switching of scheduling algorithm.
  * Normally, sc_flushing is 0, and requests that are scheduled are
  * also added to the sc_pending queue, and removed when we receive
  * the 'done' event.
  *
  * When we are transparently inserted on an existing provider,
  * sc_proxying is set. The detach procedure is slightly different.
  *
  * When switching schedulers, sc_flushing is set so requests bypass us,
  * and at the same time we update the pointer in the pending bios
  * to ignore us when they return up.
  * XXX it would be more efficient to implement sc_pending with
  * a generation number: the softc generation is increased when
  * we change scheduling algorithm, we store the current generation
  * number in the pending bios, and when they come back we ignore
  * the done() call if the generation number do not match.
  */
 struct g_sched_softc {
 	/*
 	 * Generic fields used by any scheduling algorithm:
 	 * a mutex, the class descriptor, flags, list of pending
 	 * requests (used when flushing the module) and support
 	 * for hash tables where we store per-flow queues.
 	 */
 	struct mtx	sc_mtx;
 	struct g_gsched	*sc_gsched;	/* Scheduler descriptor. */
 	int		sc_pending;	/* Pending requests. */
 	int		sc_flags;	/* Various flags. */
 
 	/*
 	 * Hash tables to store per-flow queues are generally useful
 	 * so we handle them in the common code.
 	 * sc_hash and sc_mask are parameters of the hash table,
 	 * the last two fields are used to periodically remove
 	 * expired items from the hash table.
 	 */
 	struct g_hash	*sc_hash;
 	u_long		sc_mask;
 	int		sc_flush_ticks;	/* Next tick for a flush. */
 	int		sc_flush_bucket; /* Next bucket to flush. */
 
 	/*
 	 * Pointer to the algorithm's private data, which is the value
 	 * returned by sc_gsched->gs_init() . A NULL here means failure.
 	 * XXX intptr_t might be more appropriate.
 	 */
 	void		*sc_data;
 };
 
 #define	G_SCHED_PROXYING	1
 #define	G_SCHED_FLUSHING	2
 
 #endif	/* _KERNEL */
 
 #endif	/* _G_SCHED_H_ */
Index: head/sys/geom/shsec/g_shsec.c
===================================================================
--- head/sys/geom/shsec/g_shsec.c	(revision 350693)
+++ head/sys/geom/shsec/g_shsec.c	(revision 350694)
@@ -1,839 +1,840 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2005 Pawel Jakub Dawidek <pjd@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/bio.h>
 #include <sys/sbuf.h>
 #include <sys/sysctl.h>
 #include <sys/malloc.h>
 #include <vm/uma.h>
 #include <geom/geom.h>
+#include <geom/geom_dbg.h>
 #include <geom/shsec/g_shsec.h>
 
 FEATURE(geom_shsec, "GEOM shared secret device support");
 
 static MALLOC_DEFINE(M_SHSEC, "shsec_data", "GEOM_SHSEC Data");
 
 static uma_zone_t g_shsec_zone;
 
 static int g_shsec_destroy(struct g_shsec_softc *sc, boolean_t force);
 static int g_shsec_destroy_geom(struct gctl_req *req, struct g_class *mp,
     struct g_geom *gp);
 
 static g_taste_t g_shsec_taste;
 static g_ctl_req_t g_shsec_config;
 static g_dumpconf_t g_shsec_dumpconf;
 static g_init_t g_shsec_init;
 static g_fini_t g_shsec_fini;
 
 struct g_class g_shsec_class = {
 	.name = G_SHSEC_CLASS_NAME,
 	.version = G_VERSION,
 	.ctlreq = g_shsec_config,
 	.taste = g_shsec_taste,
 	.destroy_geom = g_shsec_destroy_geom,
 	.init = g_shsec_init,
 	.fini = g_shsec_fini
 };
 
 SYSCTL_DECL(_kern_geom);
 static SYSCTL_NODE(_kern_geom, OID_AUTO, shsec, CTLFLAG_RW, 0,
     "GEOM_SHSEC stuff");
 static u_int g_shsec_debug = 0;
 SYSCTL_UINT(_kern_geom_shsec, OID_AUTO, debug, CTLFLAG_RWTUN, &g_shsec_debug, 0,
     "Debug level");
 static u_int g_shsec_maxmem = MAXPHYS * 100;
 SYSCTL_UINT(_kern_geom_shsec, OID_AUTO, maxmem, CTLFLAG_RDTUN, &g_shsec_maxmem,
     0, "Maximum memory that can be allocated for I/O (in bytes)");
 static u_int g_shsec_alloc_failed = 0;
 SYSCTL_UINT(_kern_geom_shsec, OID_AUTO, alloc_failed, CTLFLAG_RD,
     &g_shsec_alloc_failed, 0, "How many times I/O allocation failed");
 
 /*
  * Greatest Common Divisor.
  */
 static u_int
 gcd(u_int a, u_int b)
 {
 	u_int c;
 
 	while (b != 0) {
 		c = a;
 		a = b;
 		b = (c % b);
 	}
 	return (a);
 }
 
 /*
  * Least Common Multiple.
  */
 static u_int
 lcm(u_int a, u_int b)
 {
 
 	return ((a * b) / gcd(a, b));
 }
 
 static void
 g_shsec_init(struct g_class *mp __unused)
 {
 
 	g_shsec_zone = uma_zcreate("g_shsec_zone", MAXPHYS, NULL, NULL, NULL,
 	    NULL, 0, 0);
 	g_shsec_maxmem -= g_shsec_maxmem % MAXPHYS;
 	uma_zone_set_max(g_shsec_zone, g_shsec_maxmem / MAXPHYS);
 }
 
 static void
 g_shsec_fini(struct g_class *mp __unused)
 {
 
 	uma_zdestroy(g_shsec_zone);
 }
 
 /*
  * Return the number of valid disks.
  */
 static u_int
 g_shsec_nvalid(struct g_shsec_softc *sc)
 {
 	u_int i, no;
 
 	no = 0;
 	for (i = 0; i < sc->sc_ndisks; i++) {
 		if (sc->sc_disks[i] != NULL)
 			no++;
 	}
 
 	return (no);
 }
 
 static void
 g_shsec_remove_disk(struct g_consumer *cp)
 {
 	struct g_shsec_softc *sc;
 	u_int no;
 
 	KASSERT(cp != NULL, ("Non-valid disk in %s.", __func__));
 	sc = (struct g_shsec_softc *)cp->private;
 	KASSERT(sc != NULL, ("NULL sc in %s.", __func__));
 	no = cp->index;
 
 	G_SHSEC_DEBUG(0, "Disk %s removed from %s.", cp->provider->name,
 	    sc->sc_name);
 
 	sc->sc_disks[no] = NULL;
 	if (sc->sc_provider != NULL) {
 		g_wither_provider(sc->sc_provider, ENXIO);
 		sc->sc_provider = NULL;
 		G_SHSEC_DEBUG(0, "Device %s removed.", sc->sc_name);
 	}
 
 	if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
 		g_access(cp, -cp->acr, -cp->acw, -cp->ace);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 }
 
 static void
 g_shsec_orphan(struct g_consumer *cp)
 {
 	struct g_shsec_softc *sc;
 	struct g_geom *gp;
 
 	g_topology_assert();
 	gp = cp->geom;
 	sc = gp->softc;
 	if (sc == NULL)
 		return;
 
 	g_shsec_remove_disk(cp);
 	/* If there are no valid disks anymore, remove device. */
 	if (g_shsec_nvalid(sc) == 0)
 		g_shsec_destroy(sc, 1);
 }
 
 static int
 g_shsec_access(struct g_provider *pp, int dr, int dw, int de)
 {
 	struct g_consumer *cp1, *cp2;
 	struct g_shsec_softc *sc;
 	struct g_geom *gp;
 	int error;
 
 	gp = pp->geom;
 	sc = gp->softc;
 
 	if (sc == NULL) {
 		/*
 		 * It looks like geom is being withered.
 		 * In that case we allow only negative requests.
 		 */
 		KASSERT(dr <= 0 && dw <= 0 && de <= 0,
 		    ("Positive access request (device=%s).", pp->name));
 		if ((pp->acr + dr) == 0 && (pp->acw + dw) == 0 &&
 		    (pp->ace + de) == 0) {
 			G_SHSEC_DEBUG(0, "Device %s definitely destroyed.",
 			    gp->name);
 		}
 		return (0);
 	}
 
 	/* On first open, grab an extra "exclusive" bit */
 	if (pp->acr == 0 && pp->acw == 0 && pp->ace == 0)
 		de++;
 	/* ... and let go of it on last close */
 	if ((pp->acr + dr) == 0 && (pp->acw + dw) == 0 && (pp->ace + de) == 0)
 		de--;
 
 	error = ENXIO;
 	LIST_FOREACH(cp1, &gp->consumer, consumer) {
 		error = g_access(cp1, dr, dw, de);
 		if (error == 0)
 			continue;
 		/*
 		 * If we fail here, backout all previous changes.
 		 */
 		LIST_FOREACH(cp2, &gp->consumer, consumer) {
 			if (cp1 == cp2)
 				return (error);
 			g_access(cp2, -dr, -dw, -de);
 		}
 		/* NOTREACHED */
 	}
 
 	return (error);
 }
 
 static void
 g_shsec_xor1(uint32_t *src, uint32_t *dst, ssize_t len)
 {
 
 	for (; len > 0; len -= sizeof(uint32_t), dst++)
 		*dst = *dst ^ *src++;
 	KASSERT(len == 0, ("len != 0 (len=%zd)", len));
 }
 
 static void
 g_shsec_done(struct bio *bp)
 {
 	struct g_shsec_softc *sc;
 	struct bio *pbp;
 
 	pbp = bp->bio_parent;
 	sc = pbp->bio_to->geom->softc;
 	if (bp->bio_error == 0)
 		G_SHSEC_LOGREQ(2, bp, "Request done.");
 	else {
 		G_SHSEC_LOGREQ(0, bp, "Request failed (error=%d).",
 		    bp->bio_error);
 		if (pbp->bio_error == 0)
 			pbp->bio_error = bp->bio_error;
 	}
 	if (pbp->bio_cmd == BIO_READ) {
 		if ((pbp->bio_pflags & G_SHSEC_BFLAG_FIRST) != 0) {
 			bcopy(bp->bio_data, pbp->bio_data, pbp->bio_length);
 			pbp->bio_pflags = 0;
 		} else {
 			g_shsec_xor1((uint32_t *)bp->bio_data,
 			    (uint32_t *)pbp->bio_data,
 			    (ssize_t)pbp->bio_length);
 		}
 	}
 	bzero(bp->bio_data, bp->bio_length);
 	uma_zfree(g_shsec_zone, bp->bio_data);
 	g_destroy_bio(bp);
 	pbp->bio_inbed++;
 	if (pbp->bio_children == pbp->bio_inbed) {
 		pbp->bio_completed = pbp->bio_length;
 		g_io_deliver(pbp, pbp->bio_error);
 	}
 }
 
 static void
 g_shsec_xor2(uint32_t *rand, uint32_t *dst, ssize_t len)
 {
 
 	for (; len > 0; len -= sizeof(uint32_t), dst++) {
 		*rand = arc4random();
 		*dst = *dst ^ *rand++;
 	}
 	KASSERT(len == 0, ("len != 0 (len=%zd)", len));
 }
 
 static void
 g_shsec_start(struct bio *bp)
 {
 	TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue);
 	struct g_shsec_softc *sc;
 	struct bio *cbp;
 	uint32_t *dst;
 	ssize_t len;
 	u_int no;
 	int error;
 
 	sc = bp->bio_to->geom->softc;
 	/*
 	 * If sc == NULL, provider's error should be set and g_shsec_start()
 	 * should not be called at all.
 	 */
 	KASSERT(sc != NULL,
 	    ("Provider's error should be set (error=%d)(device=%s).",
 	    bp->bio_to->error, bp->bio_to->name));
 
 	G_SHSEC_LOGREQ(2, bp, "Request received.");
 
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 	case BIO_WRITE:
 	case BIO_FLUSH:
 		/*
 		 * Only those requests are supported.
 		 */
 		break;
 	case BIO_DELETE:
 	case BIO_GETATTR:
 		/* To which provider it should be delivered? */
 	default:
 		g_io_deliver(bp, EOPNOTSUPP);
 		return;
 	}
 
 	/*
 	 * Allocate all bios first and calculate XOR.
 	 */
 	dst = NULL;
 	len = bp->bio_length;
 	if (bp->bio_cmd == BIO_READ)
 		bp->bio_pflags = G_SHSEC_BFLAG_FIRST;
 	for (no = 0; no < sc->sc_ndisks; no++) {
 		cbp = g_clone_bio(bp);
 		if (cbp == NULL) {
 			error = ENOMEM;
 			goto failure;
 		}
 		TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
 
 		/*
 		 * Fill in the component buf structure.
 		 */
 		cbp->bio_done = g_shsec_done;
 		cbp->bio_data = uma_zalloc(g_shsec_zone, M_NOWAIT);
 		if (cbp->bio_data == NULL) {
 			g_shsec_alloc_failed++;
 			error = ENOMEM;
 			goto failure;
 		}
 		cbp->bio_caller2 = sc->sc_disks[no];
 		if (bp->bio_cmd == BIO_WRITE) {
 			if (no == 0) {
 				dst = (uint32_t *)cbp->bio_data;
 				bcopy(bp->bio_data, dst, len);
 			} else {
 				g_shsec_xor2((uint32_t *)cbp->bio_data, dst,
 				    len);
 			}
 		}
 	}
 	/*
 	 * Fire off all allocated requests!
 	 */
 	while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
 		struct g_consumer *cp;
 
 		TAILQ_REMOVE(&queue, cbp, bio_queue);
 		cp = cbp->bio_caller2;
 		cbp->bio_caller2 = NULL;
 		cbp->bio_to = cp->provider;
 		G_SHSEC_LOGREQ(2, cbp, "Sending request.");
 		g_io_request(cbp, cp);
 	}
 	return;
 failure:
 	while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
 		TAILQ_REMOVE(&queue, cbp, bio_queue);
 		bp->bio_children--;
 		if (cbp->bio_data != NULL) {
 			bzero(cbp->bio_data, cbp->bio_length);
 			uma_zfree(g_shsec_zone, cbp->bio_data);
 		}
 		g_destroy_bio(cbp);
 	}
 	if (bp->bio_error == 0)
 		bp->bio_error = error;
 	g_io_deliver(bp, bp->bio_error);
 }
 
 static void
 g_shsec_check_and_run(struct g_shsec_softc *sc)
 {
 	off_t mediasize, ms;
 	u_int no, sectorsize = 0;
 
 	if (g_shsec_nvalid(sc) != sc->sc_ndisks)
 		return;
 
 	sc->sc_provider = g_new_providerf(sc->sc_geom, "shsec/%s", sc->sc_name);
 	/*
 	 * Find the smallest disk.
 	 */
 	mediasize = sc->sc_disks[0]->provider->mediasize;
 	mediasize -= sc->sc_disks[0]->provider->sectorsize;
 	sectorsize = sc->sc_disks[0]->provider->sectorsize;
 	for (no = 1; no < sc->sc_ndisks; no++) {
 		ms = sc->sc_disks[no]->provider->mediasize;
 		ms -= sc->sc_disks[no]->provider->sectorsize;
 		if (ms < mediasize)
 			mediasize = ms;
 		sectorsize = lcm(sectorsize,
 		    sc->sc_disks[no]->provider->sectorsize);
 	}
 	sc->sc_provider->sectorsize = sectorsize;
 	sc->sc_provider->mediasize = mediasize;
 	g_error_provider(sc->sc_provider, 0);
 
 	G_SHSEC_DEBUG(0, "Device %s activated.", sc->sc_name);
 }
 
 static int
 g_shsec_read_metadata(struct g_consumer *cp, struct g_shsec_metadata *md)
 {
 	struct g_provider *pp;
 	u_char *buf;
 	int error;
 
 	g_topology_assert();
 
 	error = g_access(cp, 1, 0, 0);
 	if (error != 0)
 		return (error);
 	pp = cp->provider;
 	g_topology_unlock();
 	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
 	    &error);
 	g_topology_lock();
 	g_access(cp, -1, 0, 0);
 	if (buf == NULL)
 		return (error);
 
 	/* Decode metadata. */
 	shsec_metadata_decode(buf, md);
 	g_free(buf);
 
 	return (0);
 }
 
 /*
  * Add disk to given device.
  */
 static int
 g_shsec_add_disk(struct g_shsec_softc *sc, struct g_provider *pp, u_int no)
 {
 	struct g_consumer *cp, *fcp;
 	struct g_geom *gp;
 	struct g_shsec_metadata md;
 	int error;
 
 	/* Metadata corrupted? */
 	if (no >= sc->sc_ndisks)
 		return (EINVAL);
 
 	/* Check if disk is not already attached. */
 	if (sc->sc_disks[no] != NULL)
 		return (EEXIST);
 
 	gp = sc->sc_geom;
 	fcp = LIST_FIRST(&gp->consumer);
 
 	cp = g_new_consumer(gp);
 	error = g_attach(cp, pp);
 	if (error != 0) {
 		g_destroy_consumer(cp);
 		return (error);
 	}
 
 	if (fcp != NULL && (fcp->acr > 0 || fcp->acw > 0 || fcp->ace > 0)) {
 		error = g_access(cp, fcp->acr, fcp->acw, fcp->ace);
 		if (error != 0) {
 			g_detach(cp);
 			g_destroy_consumer(cp);
 			return (error);
 		}
 	}
 
 	/* Reread metadata. */
 	error = g_shsec_read_metadata(cp, &md);
 	if (error != 0)
 		goto fail;
 
 	if (strcmp(md.md_magic, G_SHSEC_MAGIC) != 0 ||
 	    strcmp(md.md_name, sc->sc_name) != 0 || md.md_id != sc->sc_id) {
 		G_SHSEC_DEBUG(0, "Metadata on %s changed.", pp->name);
 		goto fail;
 	}
 
 	cp->private = sc;
 	cp->index = no;
 	sc->sc_disks[no] = cp;
 
 	G_SHSEC_DEBUG(0, "Disk %s attached to %s.", pp->name, sc->sc_name);
 
 	g_shsec_check_and_run(sc);
 
 	return (0);
 fail:
 	if (fcp != NULL && (fcp->acr > 0 || fcp->acw > 0 || fcp->ace > 0))
 		g_access(cp, -fcp->acr, -fcp->acw, -fcp->ace);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 	return (error);
 }
 
 static struct g_geom *
 g_shsec_create(struct g_class *mp, const struct g_shsec_metadata *md)
 {
 	struct g_shsec_softc *sc;
 	struct g_geom *gp;
 	u_int no;
 
 	G_SHSEC_DEBUG(1, "Creating device %s (id=%u).", md->md_name, md->md_id);
 
 	/* Two disks is minimum. */
 	if (md->md_all < 2) {
 		G_SHSEC_DEBUG(0, "Too few disks defined for %s.", md->md_name);
 		return (NULL);
 	}
 
 	/* Check for duplicate unit */
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		sc = gp->softc;
 		if (sc != NULL && strcmp(sc->sc_name, md->md_name) == 0) {
 			G_SHSEC_DEBUG(0, "Device %s already configured.",
 			    sc->sc_name);
 			return (NULL);
 		}
 	}
 	gp = g_new_geomf(mp, "%s", md->md_name);
 	sc = malloc(sizeof(*sc), M_SHSEC, M_WAITOK | M_ZERO);
 	gp->start = g_shsec_start;
 	gp->spoiled = g_shsec_orphan;
 	gp->orphan = g_shsec_orphan;
 	gp->access = g_shsec_access;
 	gp->dumpconf = g_shsec_dumpconf;
 
 	sc->sc_id = md->md_id;
 	sc->sc_ndisks = md->md_all;
 	sc->sc_disks = malloc(sizeof(struct g_consumer *) * sc->sc_ndisks,
 	    M_SHSEC, M_WAITOK | M_ZERO);
 	for (no = 0; no < sc->sc_ndisks; no++)
 		sc->sc_disks[no] = NULL;
 
 	gp->softc = sc;
 	sc->sc_geom = gp;
 	sc->sc_provider = NULL;
 
 	G_SHSEC_DEBUG(0, "Device %s created (id=%u).", sc->sc_name, sc->sc_id);
 
 	return (gp);
 }
 
 static int
 g_shsec_destroy(struct g_shsec_softc *sc, boolean_t force)
 {
 	struct g_provider *pp;
 	struct g_geom *gp;
 	u_int no;
 
 	g_topology_assert();
 
 	if (sc == NULL)
 		return (ENXIO);
 
 	pp = sc->sc_provider;
 	if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
 		if (force) {
 			G_SHSEC_DEBUG(0, "Device %s is still open, so it "
 			    "can't be definitely removed.", pp->name);
 		} else {
 			G_SHSEC_DEBUG(1,
 			    "Device %s is still open (r%dw%de%d).", pp->name,
 			    pp->acr, pp->acw, pp->ace);
 			return (EBUSY);
 		}
 	}
 
 	for (no = 0; no < sc->sc_ndisks; no++) {
 		if (sc->sc_disks[no] != NULL)
 			g_shsec_remove_disk(sc->sc_disks[no]);
 	}
 
 	gp = sc->sc_geom;
 	gp->softc = NULL;
 	KASSERT(sc->sc_provider == NULL, ("Provider still exists? (device=%s)",
 	    gp->name));
 	free(sc->sc_disks, M_SHSEC);
 	free(sc, M_SHSEC);
 
 	pp = LIST_FIRST(&gp->provider);
 	if (pp == NULL || (pp->acr == 0 && pp->acw == 0 && pp->ace == 0))
 		G_SHSEC_DEBUG(0, "Device %s destroyed.", gp->name);
 
 	g_wither_geom(gp, ENXIO);
 
 	return (0);
 }
 
 static int
 g_shsec_destroy_geom(struct gctl_req *req __unused, struct g_class *mp __unused,
     struct g_geom *gp)
 {
 	struct g_shsec_softc *sc;
 
 	sc = gp->softc;
 	return (g_shsec_destroy(sc, 0));
 }
 
 static struct g_geom *
 g_shsec_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
 {
 	struct g_shsec_metadata md;
 	struct g_shsec_softc *sc;
 	struct g_consumer *cp;
 	struct g_geom *gp;
 	int error;
 
 	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
 	g_topology_assert();
 
 	/* Skip providers that are already open for writing. */
 	if (pp->acw > 0)
 		return (NULL);
 
 	G_SHSEC_DEBUG(3, "Tasting %s.", pp->name);
 
 	gp = g_new_geomf(mp, "shsec:taste");
 	gp->start = g_shsec_start;
 	gp->access = g_shsec_access;
 	gp->orphan = g_shsec_orphan;
 	cp = g_new_consumer(gp);
 	g_attach(cp, pp);
 	error = g_shsec_read_metadata(cp, &md);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 	g_destroy_geom(gp);
 	if (error != 0)
 		return (NULL);
 	gp = NULL;
 
 	if (strcmp(md.md_magic, G_SHSEC_MAGIC) != 0)
 		return (NULL);
 	if (md.md_version > G_SHSEC_VERSION) {
 		G_SHSEC_DEBUG(0, "Kernel module is too old to handle %s.\n",
 		    pp->name);
 		return (NULL);
 	}
 	/*
 	 * Backward compatibility:
 	 */
 	/* There was no md_provsize field in earlier versions of metadata. */
 	if (md.md_version < 1)
 		md.md_provsize = pp->mediasize;
 
 	if (md.md_provider[0] != '\0' &&
 	    !g_compare_names(md.md_provider, pp->name))
 		return (NULL);
 	if (md.md_provsize != pp->mediasize)
 		return (NULL);
 
 	/*
 	 * Let's check if device already exists.
 	 */
 	sc = NULL;
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		sc = gp->softc;
 		if (sc == NULL)
 			continue;
 		if (strcmp(md.md_name, sc->sc_name) != 0)
 			continue;
 		if (md.md_id != sc->sc_id)
 			continue;
 		break;
 	}
 	if (gp != NULL) {
 		G_SHSEC_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
 		error = g_shsec_add_disk(sc, pp, md.md_no);
 		if (error != 0) {
 			G_SHSEC_DEBUG(0, "Cannot add disk %s to %s (error=%d).",
 			    pp->name, gp->name, error);
 			return (NULL);
 		}
 	} else {
 		gp = g_shsec_create(mp, &md);
 		if (gp == NULL) {
 			G_SHSEC_DEBUG(0, "Cannot create device %s.", md.md_name);
 			return (NULL);
 		}
 		sc = gp->softc;
 		G_SHSEC_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
 		error = g_shsec_add_disk(sc, pp, md.md_no);
 		if (error != 0) {
 			G_SHSEC_DEBUG(0, "Cannot add disk %s to %s (error=%d).",
 			    pp->name, gp->name, error);
 			g_shsec_destroy(sc, 1);
 			return (NULL);
 		}
 	}
 	return (gp);
 }
 
 static struct g_shsec_softc *
 g_shsec_find_device(struct g_class *mp, const char *name)
 {
 	struct g_shsec_softc *sc;
 	struct g_geom *gp;
 
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		sc = gp->softc;
 		if (sc == NULL)
 			continue;
 		if (strcmp(sc->sc_name, name) == 0)
 			return (sc);
 	}
 	return (NULL);
 }
 
 static void
 g_shsec_ctl_destroy(struct gctl_req *req, struct g_class *mp)
 {
 	struct g_shsec_softc *sc;
 	int *force, *nargs, error;
 	const char *name;
 	char param[16];
 	u_int i;
 
 	g_topology_assert();
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "No '%s' argument.", "nargs");
 		return;
 	}
 	if (*nargs <= 0) {
 		gctl_error(req, "Missing device(s).");
 		return;
 	}
 	force = gctl_get_paraml(req, "force", sizeof(*force));
 	if (force == NULL) {
 		gctl_error(req, "No '%s' argument.", "force");
 		return;
 	}
 
 	for (i = 0; i < (u_int)*nargs; i++) {
 		snprintf(param, sizeof(param), "arg%u", i);
 		name = gctl_get_asciiparam(req, param);
 		if (name == NULL) {
 			gctl_error(req, "No 'arg%u' argument.", i);
 			return;
 		}
 		sc = g_shsec_find_device(mp, name);
 		if (sc == NULL) {
 			gctl_error(req, "No such device: %s.", name);
 			return;
 		}
 		error = g_shsec_destroy(sc, *force);
 		if (error != 0) {
 			gctl_error(req, "Cannot destroy device %s (error=%d).",
 			    sc->sc_name, error);
 			return;
 		}
 	}
 }
 
 static void
 g_shsec_config(struct gctl_req *req, struct g_class *mp, const char *verb)
 {
 	uint32_t *version;
 
 	g_topology_assert();
 
 	version = gctl_get_paraml(req, "version", sizeof(*version));
 	if (version == NULL) {
 		gctl_error(req, "No '%s' argument.", "version");
 		return;
 	}
 	if (*version != G_SHSEC_VERSION) {
 		gctl_error(req, "Userland and kernel parts are out of sync.");
 		return;
 	}
 
 	if (strcmp(verb, "stop") == 0) {
 		g_shsec_ctl_destroy(req, mp);
 		return;
 	}
 
 	gctl_error(req, "Unknown verb.");
 }
 
 static void
 g_shsec_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
     struct g_consumer *cp, struct g_provider *pp)
 {
 	struct g_shsec_softc *sc;
 
 	sc = gp->softc;
 	if (sc == NULL)
 		return;
 	if (pp != NULL) {
 		/* Nothing here. */
 	} else if (cp != NULL) {
 		sbuf_printf(sb, "%s<Number>%u</Number>\n", indent,
 		    (u_int)cp->index);
 	} else {
 		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id);
 		sbuf_printf(sb, "%s<Status>Total=%u, Online=%u</Status>\n",
 		    indent, sc->sc_ndisks, g_shsec_nvalid(sc));
 		sbuf_printf(sb, "%s<State>", indent);
 		if (sc->sc_provider != NULL && sc->sc_provider->error == 0)
 			sbuf_printf(sb, "UP");
 		else
 			sbuf_printf(sb, "DOWN");
 		sbuf_printf(sb, "</State>\n");
 	}
 }
 
 DECLARE_GEOM_CLASS(g_shsec_class, g_shsec);
 MODULE_VERSION(geom_shsec, 0);
Index: head/sys/geom/shsec/g_shsec.h
===================================================================
--- head/sys/geom/shsec/g_shsec.h	(revision 350693)
+++ head/sys/geom/shsec/g_shsec.h	(revision 350694)
@@ -1,119 +1,101 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2005 Pawel Jakub Dawidek <pjd@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef	_G_SHSEC_H_
 #define	_G_SHSEC_H_
 
 #include <sys/endian.h>
 
 #define	G_SHSEC_CLASS_NAME	"SHSEC"
 
 #define	G_SHSEC_MAGIC		"GEOM::SHSEC"
 /*
  * Version history:
  * 0 - Initial version number.
  * 1 - Added md_provsize field to metadata.
  */
 #define	G_SHSEC_VERSION	1
 
 #ifdef _KERNEL
 #define	G_SHSEC_BFLAG_FIRST	0x1
 
-#define	G_SHSEC_DEBUG(lvl, ...)	do {					\
-	if (g_shsec_debug >= (lvl)) {					\
-		printf("GEOM_SHSEC");					\
-		if (g_shsec_debug > 0)					\
-			printf("[%u]", lvl);				\
-		printf(": ");						\
-		printf(__VA_ARGS__);					\
-		printf("\n");						\
-	}								\
-} while (0)
-#define	G_SHSEC_LOGREQ(lvl, bp, ...)	do {				\
-	if (g_shsec_debug >= (lvl)) {					\
-		printf("GEOM_SHSEC");					\
-		if (g_shsec_debug > 0)					\
-			printf("[%u]", lvl);				\
-		printf(": ");						\
-		printf(__VA_ARGS__);					\
-		printf(" ");						\
-		g_print_bio(bp);					\
-		printf("\n");						\
-	}								\
-} while (0)
+#define	G_SHSEC_DEBUG(lvl, ...) \
+    _GEOM_DEBUG("GEOM_SHSEC", g_shsec_debug, (lvl), NULL, __VA_ARGS__)
+#define	G_SHSEC_LOGREQ(lvl, bp, ...) \
+    _GEOM_DEBUG("GEOM_SHSEC", g_shsec_debug, (lvl), (bp), __VA_ARGS__)
 
 struct g_shsec_softc {
 	u_int		 sc_type;	/* provider type */
 	struct g_geom	*sc_geom;
 	struct g_provider *sc_provider;
 	uint32_t	 sc_id;		/* device unique ID */
 	struct g_consumer **sc_disks;
 	uint16_t	 sc_ndisks;
 };
 #define	sc_name	sc_geom->name
 #endif	/* _KERNEL */
 
 struct g_shsec_metadata {
 	char		md_magic[16];	/* Magic value. */
 	uint32_t	md_version;	/* Version number. */
 	char		md_name[16];	/* Stripe name. */
 	uint32_t	md_id;		/* Unique ID. */
 	uint16_t	md_no;		/* Disk number. */
 	uint16_t	md_all;		/* Number of all disks. */
 	char		md_provider[16]; /* Hardcoded provider. */
 	uint64_t	md_provsize;	/* Provider's size. */
 };
 static __inline void
 shsec_metadata_encode(const struct g_shsec_metadata *md, u_char *data)
 {
 
 	bcopy(md->md_magic, data, sizeof(md->md_magic));
 	le32enc(data + 16, md->md_version);
 	bcopy(md->md_name, data + 20, sizeof(md->md_name));
 	le32enc(data + 36, md->md_id);
 	le16enc(data + 40, md->md_no);
 	le16enc(data + 42, md->md_all);
 	bcopy(md->md_provider, data + 44, sizeof(md->md_provider));
 	le64enc(data + 60, md->md_provsize);
 }
 static __inline void
 shsec_metadata_decode(const u_char *data, struct g_shsec_metadata *md)
 {
 
 	bcopy(data, md->md_magic, sizeof(md->md_magic));
 	md->md_version = le32dec(data + 16);
 	bcopy(data + 20, md->md_name, sizeof(md->md_name));
 	md->md_id = le32dec(data + 36);
 	md->md_no = le16dec(data + 40);
 	md->md_all = le16dec(data + 42);
 	bcopy(data + 44, md->md_provider, sizeof(md->md_provider));
 	md->md_provsize = le64dec(data + 60);
 }
 #endif	/* _G_SHSEC_H_ */
Index: head/sys/geom/stripe/g_stripe.c
===================================================================
--- head/sys/geom/stripe/g_stripe.c	(revision 350693)
+++ head/sys/geom/stripe/g_stripe.c	(revision 350694)
@@ -1,1274 +1,1275 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2004-2005 Pawel Jakub Dawidek <pjd@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/bio.h>
 #include <sys/sbuf.h>
 #include <sys/sysctl.h>
 #include <sys/malloc.h>
 #include <vm/uma.h>
 #include <geom/geom.h>
+#include <geom/geom_dbg.h>
 #include <geom/stripe/g_stripe.h>
 
 FEATURE(geom_stripe, "GEOM striping support");
 
 static MALLOC_DEFINE(M_STRIPE, "stripe_data", "GEOM_STRIPE Data");
 
 static uma_zone_t g_stripe_zone;
 
 static int g_stripe_destroy(struct g_stripe_softc *sc, boolean_t force);
 static int g_stripe_destroy_geom(struct gctl_req *req, struct g_class *mp,
     struct g_geom *gp);
 
 static g_taste_t g_stripe_taste;
 static g_ctl_req_t g_stripe_config;
 static g_dumpconf_t g_stripe_dumpconf;
 static g_init_t g_stripe_init;
 static g_fini_t g_stripe_fini;
 
 struct g_class g_stripe_class = {
 	.name = G_STRIPE_CLASS_NAME,
 	.version = G_VERSION,
 	.ctlreq = g_stripe_config,
 	.taste = g_stripe_taste,
 	.destroy_geom = g_stripe_destroy_geom,
 	.init = g_stripe_init,
 	.fini = g_stripe_fini
 };
 
 SYSCTL_DECL(_kern_geom);
 static SYSCTL_NODE(_kern_geom, OID_AUTO, stripe, CTLFLAG_RW, 0,
     "GEOM_STRIPE stuff");
 static u_int g_stripe_debug = 0;
 SYSCTL_UINT(_kern_geom_stripe, OID_AUTO, debug, CTLFLAG_RWTUN, &g_stripe_debug, 0,
     "Debug level");
 static int g_stripe_fast = 0;
 static int
 g_sysctl_stripe_fast(SYSCTL_HANDLER_ARGS)
 {
 	int error, fast;
 
 	fast = g_stripe_fast;
 	error = sysctl_handle_int(oidp, &fast, 0, req);
 	if (error == 0 && req->newptr != NULL)
 		g_stripe_fast = fast;
 	return (error);
 }
 SYSCTL_PROC(_kern_geom_stripe, OID_AUTO, fast, CTLTYPE_INT | CTLFLAG_RWTUN,
     NULL, 0, g_sysctl_stripe_fast, "I", "Fast, but memory-consuming, mode");
 static u_int g_stripe_maxmem = MAXPHYS * 100;
 SYSCTL_UINT(_kern_geom_stripe, OID_AUTO, maxmem, CTLFLAG_RDTUN, &g_stripe_maxmem,
     0, "Maximum memory that can be allocated in \"fast\" mode (in bytes)");
 static u_int g_stripe_fast_failed = 0;
 SYSCTL_UINT(_kern_geom_stripe, OID_AUTO, fast_failed, CTLFLAG_RD,
     &g_stripe_fast_failed, 0, "How many times \"fast\" mode failed");
 
 /*
  * Greatest Common Divisor.
  */
 static u_int
 gcd(u_int a, u_int b)
 {
 	u_int c;
 
 	while (b != 0) {
 		c = a;
 		a = b;
 		b = (c % b);
 	}
 	return (a);
 }
 
 /*
  * Least Common Multiple.
  */
 static u_int
 lcm(u_int a, u_int b)
 {
 
 	return ((a * b) / gcd(a, b));
 }
 
 static void
 g_stripe_init(struct g_class *mp __unused)
 {
 
 	g_stripe_zone = uma_zcreate("g_stripe_zone", MAXPHYS, NULL, NULL,
 	    NULL, NULL, 0, 0);
 	g_stripe_maxmem -= g_stripe_maxmem % MAXPHYS;
 	uma_zone_set_max(g_stripe_zone, g_stripe_maxmem / MAXPHYS);
 }
 
 static void
 g_stripe_fini(struct g_class *mp __unused)
 {
 
 	uma_zdestroy(g_stripe_zone);
 }
 
 /*
  * Return the number of valid disks.
  */
 static u_int
 g_stripe_nvalid(struct g_stripe_softc *sc)
 {
 	u_int i, no;
 
 	no = 0;
 	for (i = 0; i < sc->sc_ndisks; i++) {
 		if (sc->sc_disks[i] != NULL)
 			no++;
 	}
 
 	return (no);
 }
 
 static void
 g_stripe_remove_disk(struct g_consumer *cp)
 {
 	struct g_stripe_softc *sc;
 
 	g_topology_assert();
 	KASSERT(cp != NULL, ("Non-valid disk in %s.", __func__));
 	sc = (struct g_stripe_softc *)cp->geom->softc;
 	KASSERT(sc != NULL, ("NULL sc in %s.", __func__));
 
 	if (cp->private == NULL) {
 		G_STRIPE_DEBUG(0, "Disk %s removed from %s.",
 		    cp->provider->name, sc->sc_name);
 		cp->private = (void *)(uintptr_t)-1;
 	}
 
 	if (sc->sc_provider != NULL) {
 		G_STRIPE_DEBUG(0, "Device %s deactivated.",
 		    sc->sc_provider->name);
 		g_wither_provider(sc->sc_provider, ENXIO);
 		sc->sc_provider = NULL;
 	}
 
 	if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
 		return;
 	sc->sc_disks[cp->index] = NULL;
 	cp->index = 0;
 	g_detach(cp);
 	g_destroy_consumer(cp);
 	/* If there are no valid disks anymore, remove device. */
 	if (LIST_EMPTY(&sc->sc_geom->consumer))
 		g_stripe_destroy(sc, 1);
 }
 
 static void
 g_stripe_orphan(struct g_consumer *cp)
 {
 	struct g_stripe_softc *sc;
 	struct g_geom *gp;
 
 	g_topology_assert();
 	gp = cp->geom;
 	sc = gp->softc;
 	if (sc == NULL)
 		return;
 
 	g_stripe_remove_disk(cp);
 }
 
 static int
 g_stripe_access(struct g_provider *pp, int dr, int dw, int de)
 {
 	struct g_consumer *cp1, *cp2, *tmp;
 	struct g_stripe_softc *sc;
 	struct g_geom *gp;
 	int error;
 
 	g_topology_assert();
 	gp = pp->geom;
 	sc = gp->softc;
 	KASSERT(sc != NULL, ("NULL sc in %s.", __func__));
 
 	/* On first open, grab an extra "exclusive" bit */
 	if (pp->acr == 0 && pp->acw == 0 && pp->ace == 0)
 		de++;
 	/* ... and let go of it on last close */
 	if ((pp->acr + dr) == 0 && (pp->acw + dw) == 0 && (pp->ace + de) == 0)
 		de--;
 
 	LIST_FOREACH_SAFE(cp1, &gp->consumer, consumer, tmp) {
 		error = g_access(cp1, dr, dw, de);
 		if (error != 0)
 			goto fail;
 		if (cp1->acr == 0 && cp1->acw == 0 && cp1->ace == 0 &&
 		    cp1->private != NULL) {
 			g_stripe_remove_disk(cp1); /* May destroy geom. */
 		}
 	}
 	return (0);
 
 fail:
 	LIST_FOREACH(cp2, &gp->consumer, consumer) {
 		if (cp1 == cp2)
 			break;
 		g_access(cp2, -dr, -dw, -de);
 	}
 	return (error);
 }
 
 static void
 g_stripe_copy(struct g_stripe_softc *sc, char *src, char *dst, off_t offset,
     off_t length, int mode)
 {
 	off_t stripesize;
 	size_t len;
 
 	stripesize = sc->sc_stripesize;
 	len = (size_t)(stripesize - (offset & (stripesize - 1)));
 	do {
 		bcopy(src, dst, len);
 		if (mode) {
 			dst += len + stripesize * (sc->sc_ndisks - 1);
 			src += len;
 		} else {
 			dst += len;
 			src += len + stripesize * (sc->sc_ndisks - 1);
 		}
 		length -= len;
 		KASSERT(length >= 0,
 		    ("Length < 0 (stripesize=%ju, offset=%ju, length=%jd).",
 		    (uintmax_t)stripesize, (uintmax_t)offset, (intmax_t)length));
 		if (length > stripesize)
 			len = stripesize;
 		else
 			len = length;
 	} while (length > 0);
 }
 
 static void
 g_stripe_done(struct bio *bp)
 {
 	struct g_stripe_softc *sc;
 	struct bio *pbp;
 
 	pbp = bp->bio_parent;
 	sc = pbp->bio_to->geom->softc;
 	if (bp->bio_cmd == BIO_READ && bp->bio_caller1 != NULL) {
 		g_stripe_copy(sc, bp->bio_data, bp->bio_caller1, bp->bio_offset,
 		    bp->bio_length, 1);
 		bp->bio_data = bp->bio_caller1;
 		bp->bio_caller1 = NULL;
 	}
 	mtx_lock(&sc->sc_lock);
 	if (pbp->bio_error == 0)
 		pbp->bio_error = bp->bio_error;
 	pbp->bio_completed += bp->bio_completed;
 	pbp->bio_inbed++;
 	if (pbp->bio_children == pbp->bio_inbed) {
 		mtx_unlock(&sc->sc_lock);
 		if (pbp->bio_driver1 != NULL)
 			uma_zfree(g_stripe_zone, pbp->bio_driver1);
 		g_io_deliver(pbp, pbp->bio_error);
 	} else
 		mtx_unlock(&sc->sc_lock);
 	g_destroy_bio(bp);
 }
 
 static int
 g_stripe_start_fast(struct bio *bp, u_int no, off_t offset, off_t length)
 {
 	TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue);
 	struct g_stripe_softc *sc;
 	char *addr, *data = NULL;
 	struct bio *cbp;
 	off_t stripesize;
 	u_int nparts = 0;
 	int error;
 
 	sc = bp->bio_to->geom->softc;
 
 	addr = bp->bio_data;
 	stripesize = sc->sc_stripesize;
 
 	cbp = g_clone_bio(bp);
 	if (cbp == NULL) {
 		error = ENOMEM;
 		goto failure;
 	}
 	TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
 	nparts++;
 	/*
 	 * Fill in the component buf structure.
 	 */
 	cbp->bio_done = g_stripe_done;
 	cbp->bio_offset = offset;
 	cbp->bio_data = addr;
 	cbp->bio_caller1 = NULL;
 	cbp->bio_length = length;
 	cbp->bio_caller2 = sc->sc_disks[no];
 
 	/* offset -= offset % stripesize; */
 	offset -= offset & (stripesize - 1);
 	addr += length;
 	length = bp->bio_length - length;
 	for (no++; length > 0; no++, length -= stripesize, addr += stripesize) {
 		if (no > sc->sc_ndisks - 1) {
 			no = 0;
 			offset += stripesize;
 		}
 		if (nparts >= sc->sc_ndisks) {
 			cbp = TAILQ_NEXT(cbp, bio_queue);
 			if (cbp == NULL)
 				cbp = TAILQ_FIRST(&queue);
 			nparts++;
 			/*
 			 * Update bio structure.
 			 */
 			/*
 			 * MIN() is in case when
 			 * (bp->bio_length % sc->sc_stripesize) != 0.
 			 */
 			cbp->bio_length += MIN(stripesize, length);
 			if (cbp->bio_caller1 == NULL) {
 				cbp->bio_caller1 = cbp->bio_data;
 				cbp->bio_data = NULL;
 				if (data == NULL) {
 					data = uma_zalloc(g_stripe_zone,
 					    M_NOWAIT);
 					if (data == NULL) {
 						error = ENOMEM;
 						goto failure;
 					}
 				}
 			}
 		} else {
 			cbp = g_clone_bio(bp);
 			if (cbp == NULL) {
 				error = ENOMEM;
 				goto failure;
 			}
 			TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
 			nparts++;
 			/*
 			 * Fill in the component buf structure.
 			 */
 			cbp->bio_done = g_stripe_done;
 			cbp->bio_offset = offset;
 			cbp->bio_data = addr;
 			cbp->bio_caller1 = NULL;
 			/*
 			 * MIN() is in case when
 			 * (bp->bio_length % sc->sc_stripesize) != 0.
 			 */
 			cbp->bio_length = MIN(stripesize, length);
 			cbp->bio_caller2 = sc->sc_disks[no];
 		}
 	}
 	if (data != NULL)
 		bp->bio_driver1 = data;
 	/*
 	 * Fire off all allocated requests!
 	 */
 	while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
 		struct g_consumer *cp;
 
 		TAILQ_REMOVE(&queue, cbp, bio_queue);
 		cp = cbp->bio_caller2;
 		cbp->bio_caller2 = NULL;
 		cbp->bio_to = cp->provider;
 		if (cbp->bio_caller1 != NULL) {
 			cbp->bio_data = data;
 			if (bp->bio_cmd == BIO_WRITE) {
 				g_stripe_copy(sc, cbp->bio_caller1, data,
 				    cbp->bio_offset, cbp->bio_length, 0);
 			}
 			data += cbp->bio_length;
 		}
 		G_STRIPE_LOGREQ(cbp, "Sending request.");
 		g_io_request(cbp, cp);
 	}
 	return (0);
 failure:
 	if (data != NULL)
 		uma_zfree(g_stripe_zone, data);
 	while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
 		TAILQ_REMOVE(&queue, cbp, bio_queue);
 		if (cbp->bio_caller1 != NULL) {
 			cbp->bio_data = cbp->bio_caller1;
 			cbp->bio_caller1 = NULL;
 		}
 		bp->bio_children--;
 		g_destroy_bio(cbp);
 	}
 	return (error);
 }
 
 static int
 g_stripe_start_economic(struct bio *bp, u_int no, off_t offset, off_t length)
 {
 	TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue);
 	struct g_stripe_softc *sc;
 	off_t stripesize;
 	struct bio *cbp;
 	char *addr;
 	int error;
 
 	sc = bp->bio_to->geom->softc;
 
 	stripesize = sc->sc_stripesize;
 
 	cbp = g_clone_bio(bp);
 	if (cbp == NULL) {
 		error = ENOMEM;
 		goto failure;
 	}
 	TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
 	/*
 	 * Fill in the component buf structure.
 	 */
 	if (bp->bio_length == length)
 		cbp->bio_done = g_std_done;	/* Optimized lockless case. */
 	else
 		cbp->bio_done = g_stripe_done;
 	cbp->bio_offset = offset;
 	cbp->bio_length = length;
 	if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
 		bp->bio_ma_n = round_page(bp->bio_ma_offset +
 		    bp->bio_length) / PAGE_SIZE;
 		addr = NULL;
 	} else
 		addr = bp->bio_data;
 	cbp->bio_caller2 = sc->sc_disks[no];
 
 	/* offset -= offset % stripesize; */
 	offset -= offset & (stripesize - 1);
 	if (bp->bio_cmd != BIO_DELETE)
 		addr += length;
 	length = bp->bio_length - length;
 	for (no++; length > 0; no++, length -= stripesize) {
 		if (no > sc->sc_ndisks - 1) {
 			no = 0;
 			offset += stripesize;
 		}
 		cbp = g_clone_bio(bp);
 		if (cbp == NULL) {
 			error = ENOMEM;
 			goto failure;
 		}
 		TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
 
 		/*
 		 * Fill in the component buf structure.
 		 */
 		cbp->bio_done = g_stripe_done;
 		cbp->bio_offset = offset;
 		/*
 		 * MIN() is in case when
 		 * (bp->bio_length % sc->sc_stripesize) != 0.
 		 */
 		cbp->bio_length = MIN(stripesize, length);
 		if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
 			cbp->bio_ma_offset += (uintptr_t)addr;
 			cbp->bio_ma += cbp->bio_ma_offset / PAGE_SIZE;
 			cbp->bio_ma_offset %= PAGE_SIZE;
 			cbp->bio_ma_n = round_page(cbp->bio_ma_offset +
 			    cbp->bio_length) / PAGE_SIZE;
 		} else
 			cbp->bio_data = addr;
 
 		cbp->bio_caller2 = sc->sc_disks[no];
 
 		if (bp->bio_cmd != BIO_DELETE)
 			addr += stripesize;
 	}
 	/*
 	 * Fire off all allocated requests!
 	 */
 	while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
 		struct g_consumer *cp;
 
 		TAILQ_REMOVE(&queue, cbp, bio_queue);
 		cp = cbp->bio_caller2;
 		cbp->bio_caller2 = NULL;
 		cbp->bio_to = cp->provider;
 		G_STRIPE_LOGREQ(cbp, "Sending request.");
 		g_io_request(cbp, cp);
 	}
 	return (0);
 failure:
 	while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
 		TAILQ_REMOVE(&queue, cbp, bio_queue);
 		bp->bio_children--;
 		g_destroy_bio(cbp);
 	}
 	return (error);
 }
 
 static void
 g_stripe_flush(struct g_stripe_softc *sc, struct bio *bp)
 {
 	struct bio_queue_head queue;
 	struct g_consumer *cp;
 	struct bio *cbp;
 	u_int no;
 
 	bioq_init(&queue);
 	for (no = 0; no < sc->sc_ndisks; no++) {
 		cbp = g_clone_bio(bp);
 		if (cbp == NULL) {
 			for (cbp = bioq_first(&queue); cbp != NULL;
 			    cbp = bioq_first(&queue)) {
 				bioq_remove(&queue, cbp);
 				g_destroy_bio(cbp);
 			}
 			if (bp->bio_error == 0)
 				bp->bio_error = ENOMEM;
 			g_io_deliver(bp, bp->bio_error);
 			return;
 		}
 		bioq_insert_tail(&queue, cbp);
 		cbp->bio_done = g_stripe_done;
 		cbp->bio_caller2 = sc->sc_disks[no];
 		cbp->bio_to = sc->sc_disks[no]->provider;
 	}
 	for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) {
 		bioq_remove(&queue, cbp);
 		G_STRIPE_LOGREQ(cbp, "Sending request.");
 		cp = cbp->bio_caller2;
 		cbp->bio_caller2 = NULL;
 		g_io_request(cbp, cp);
 	}
 }
 
 static void
 g_stripe_start(struct bio *bp)
 {
 	off_t offset, start, length, nstripe, stripesize;
 	struct g_stripe_softc *sc;
 	u_int no;
 	int error, fast = 0;
 
 	sc = bp->bio_to->geom->softc;
 	/*
 	 * If sc == NULL, provider's error should be set and g_stripe_start()
 	 * should not be called at all.
 	 */
 	KASSERT(sc != NULL,
 	    ("Provider's error should be set (error=%d)(device=%s).",
 	    bp->bio_to->error, bp->bio_to->name));
 
 	G_STRIPE_LOGREQ(bp, "Request received.");
 
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 	case BIO_WRITE:
 	case BIO_DELETE:
 		break;
 	case BIO_FLUSH:
 		g_stripe_flush(sc, bp);
 		return;
 	case BIO_GETATTR:
 		/* To which provider it should be delivered? */
 	default:
 		g_io_deliver(bp, EOPNOTSUPP);
 		return;
 	}
 
 	stripesize = sc->sc_stripesize;
 
 	/*
 	 * Calculations are quite messy, but fast I hope.
 	 */
 
 	/* Stripe number. */
 	/* nstripe = bp->bio_offset / stripesize; */
 	nstripe = bp->bio_offset >> (off_t)sc->sc_stripebits;
 	/* Disk number. */
 	no = nstripe % sc->sc_ndisks;
 	/* Start position in stripe. */
 	/* start = bp->bio_offset % stripesize; */
 	start = bp->bio_offset & (stripesize - 1);
 	/* Start position in disk. */
 	/* offset = (nstripe / sc->sc_ndisks) * stripesize + start; */
 	offset = ((nstripe / sc->sc_ndisks) << sc->sc_stripebits) + start;
 	/* Length of data to operate. */
 	length = MIN(bp->bio_length, stripesize - start);
 
 	/*
 	 * Do use "fast" mode when:
 	 * 1. "Fast" mode is ON.
 	 * and
 	 * 2. Request size is less than or equal to MAXPHYS,
 	 *    which should always be true.
 	 * and
 	 * 3. Request size is bigger than stripesize * ndisks. If it isn't,
 	 *    there will be no need to send more than one I/O request to
 	 *    a provider, so there is nothing to optmize.
 	 * and
 	 * 4. Request is not unmapped.
 	 * and
 	 * 5. It is not a BIO_DELETE.
 	 */
 	if (g_stripe_fast && bp->bio_length <= MAXPHYS &&
 	    bp->bio_length >= stripesize * sc->sc_ndisks &&
 	    (bp->bio_flags & BIO_UNMAPPED) == 0 &&
 	    bp->bio_cmd != BIO_DELETE) {
 		fast = 1;
 	}
 	error = 0;
 	if (fast) {
 		error = g_stripe_start_fast(bp, no, offset, length);
 		if (error != 0)
 			g_stripe_fast_failed++;
 	}
 	/*
 	 * Do use "economic" when:
 	 * 1. "Economic" mode is ON.
 	 * or
 	 * 2. "Fast" mode failed. It can only fail if there is no memory.
 	 */
 	if (!fast || error != 0)
 		error = g_stripe_start_economic(bp, no, offset, length);
 	if (error != 0) {
 		if (bp->bio_error == 0)
 			bp->bio_error = error;
 		g_io_deliver(bp, bp->bio_error);
 	}
 }
 
 static void
 g_stripe_check_and_run(struct g_stripe_softc *sc)
 {
 	struct g_provider *dp;
 	off_t mediasize, ms;
 	u_int no, sectorsize = 0;
 
 	g_topology_assert();
 	if (g_stripe_nvalid(sc) != sc->sc_ndisks)
 		return;
 
 	sc->sc_provider = g_new_providerf(sc->sc_geom, "stripe/%s",
 	    sc->sc_name);
 	sc->sc_provider->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE;
 	if (g_stripe_fast == 0)
 		sc->sc_provider->flags |= G_PF_ACCEPT_UNMAPPED;
 	/*
 	 * Find the smallest disk.
 	 */
 	mediasize = sc->sc_disks[0]->provider->mediasize;
 	if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC)
 		mediasize -= sc->sc_disks[0]->provider->sectorsize;
 	mediasize -= mediasize % sc->sc_stripesize;
 	sectorsize = sc->sc_disks[0]->provider->sectorsize;
 	for (no = 1; no < sc->sc_ndisks; no++) {
 		dp = sc->sc_disks[no]->provider;
 		ms = dp->mediasize;
 		if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC)
 			ms -= dp->sectorsize;
 		ms -= ms % sc->sc_stripesize;
 		if (ms < mediasize)
 			mediasize = ms;
 		sectorsize = lcm(sectorsize, dp->sectorsize);
 
 		/* A provider underneath us doesn't support unmapped */
 		if ((dp->flags & G_PF_ACCEPT_UNMAPPED) == 0) {
 			G_STRIPE_DEBUG(1, "Cancelling unmapped "
 			    "because of %s.", dp->name);
 			sc->sc_provider->flags &= ~G_PF_ACCEPT_UNMAPPED;
 		}
 	}
 	sc->sc_provider->sectorsize = sectorsize;
 	sc->sc_provider->mediasize = mediasize * sc->sc_ndisks;
 	sc->sc_provider->stripesize = sc->sc_stripesize;
 	sc->sc_provider->stripeoffset = 0;
 	g_error_provider(sc->sc_provider, 0);
 
 	G_STRIPE_DEBUG(0, "Device %s activated.", sc->sc_provider->name);
 }
 
 static int
 g_stripe_read_metadata(struct g_consumer *cp, struct g_stripe_metadata *md)
 {
 	struct g_provider *pp;
 	u_char *buf;
 	int error;
 
 	g_topology_assert();
 
 	error = g_access(cp, 1, 0, 0);
 	if (error != 0)
 		return (error);
 	pp = cp->provider;
 	g_topology_unlock();
 	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
 	    &error);
 	g_topology_lock();
 	g_access(cp, -1, 0, 0);
 	if (buf == NULL)
 		return (error);
 
 	/* Decode metadata. */
 	stripe_metadata_decode(buf, md);
 	g_free(buf);
 
 	return (0);
 }
 
 /*
  * Add disk to given device.
  */
 static int
 g_stripe_add_disk(struct g_stripe_softc *sc, struct g_provider *pp, u_int no)
 {
 	struct g_consumer *cp, *fcp;
 	struct g_geom *gp;
 	int error;
 
 	g_topology_assert();
 	/* Metadata corrupted? */
 	if (no >= sc->sc_ndisks)
 		return (EINVAL);
 
 	/* Check if disk is not already attached. */
 	if (sc->sc_disks[no] != NULL)
 		return (EEXIST);
 
 	gp = sc->sc_geom;
 	fcp = LIST_FIRST(&gp->consumer);
 
 	cp = g_new_consumer(gp);
 	cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
 	cp->private = NULL;
 	cp->index = no;
 	error = g_attach(cp, pp);
 	if (error != 0) {
 		g_destroy_consumer(cp);
 		return (error);
 	}
 
 	if (fcp != NULL && (fcp->acr > 0 || fcp->acw > 0 || fcp->ace > 0)) {
 		error = g_access(cp, fcp->acr, fcp->acw, fcp->ace);
 		if (error != 0) {
 			g_detach(cp);
 			g_destroy_consumer(cp);
 			return (error);
 		}
 	}
 	if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC) {
 		struct g_stripe_metadata md;
 
 		/* Reread metadata. */
 		error = g_stripe_read_metadata(cp, &md);
 		if (error != 0)
 			goto fail;
 
 		if (strcmp(md.md_magic, G_STRIPE_MAGIC) != 0 ||
 		    strcmp(md.md_name, sc->sc_name) != 0 ||
 		    md.md_id != sc->sc_id) {
 			G_STRIPE_DEBUG(0, "Metadata on %s changed.", pp->name);
 			goto fail;
 		}
 	}
 
 	sc->sc_disks[no] = cp;
 	G_STRIPE_DEBUG(0, "Disk %s attached to %s.", pp->name, sc->sc_name);
 	g_stripe_check_and_run(sc);
 
 	return (0);
 fail:
 	if (fcp != NULL && (fcp->acr > 0 || fcp->acw > 0 || fcp->ace > 0))
 		g_access(cp, -fcp->acr, -fcp->acw, -fcp->ace);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 	return (error);
 }
 
 static struct g_geom *
 g_stripe_create(struct g_class *mp, const struct g_stripe_metadata *md,
     u_int type)
 {
 	struct g_stripe_softc *sc;
 	struct g_geom *gp;
 	u_int no;
 
 	g_topology_assert();
 	G_STRIPE_DEBUG(1, "Creating device %s (id=%u).", md->md_name,
 	    md->md_id);
 
 	/* Two disks is minimum. */
 	if (md->md_all < 2) {
 		G_STRIPE_DEBUG(0, "Too few disks defined for %s.", md->md_name);
 		return (NULL);
 	}
 #if 0
 	/* Stripe size have to be grater than or equal to sector size. */
 	if (md->md_stripesize < sectorsize) {
 		G_STRIPE_DEBUG(0, "Invalid stripe size for %s.", md->md_name);
 		return (NULL);
 	}
 #endif
 	/* Stripe size have to be power of 2. */
 	if (!powerof2(md->md_stripesize)) {
 		G_STRIPE_DEBUG(0, "Invalid stripe size for %s.", md->md_name);
 		return (NULL);
 	}
 
 	/* Check for duplicate unit */
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		sc = gp->softc;
 		if (sc != NULL && strcmp(sc->sc_name, md->md_name) == 0) {
 			G_STRIPE_DEBUG(0, "Device %s already configured.",
 			    sc->sc_name);
 			return (NULL);
 		}
 	}
 	gp = g_new_geomf(mp, "%s", md->md_name);
 	sc = malloc(sizeof(*sc), M_STRIPE, M_WAITOK | M_ZERO);
 	gp->start = g_stripe_start;
 	gp->spoiled = g_stripe_orphan;
 	gp->orphan = g_stripe_orphan;
 	gp->access = g_stripe_access;
 	gp->dumpconf = g_stripe_dumpconf;
 
 	sc->sc_id = md->md_id;
 	sc->sc_stripesize = md->md_stripesize;
 	sc->sc_stripebits = bitcount32(sc->sc_stripesize - 1);
 	sc->sc_ndisks = md->md_all;
 	sc->sc_disks = malloc(sizeof(struct g_consumer *) * sc->sc_ndisks,
 	    M_STRIPE, M_WAITOK | M_ZERO);
 	for (no = 0; no < sc->sc_ndisks; no++)
 		sc->sc_disks[no] = NULL;
 	sc->sc_type = type;
 	mtx_init(&sc->sc_lock, "gstripe lock", NULL, MTX_DEF);
 
 	gp->softc = sc;
 	sc->sc_geom = gp;
 	sc->sc_provider = NULL;
 
 	G_STRIPE_DEBUG(0, "Device %s created (id=%u).", sc->sc_name, sc->sc_id);
 
 	return (gp);
 }
 
 static int
 g_stripe_destroy(struct g_stripe_softc *sc, boolean_t force)
 {
 	struct g_provider *pp;
 	struct g_consumer *cp, *cp1;
 	struct g_geom *gp;
 
 	g_topology_assert();
 
 	if (sc == NULL)
 		return (ENXIO);
 
 	pp = sc->sc_provider;
 	if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
 		if (force) {
 			G_STRIPE_DEBUG(0, "Device %s is still open, so it "
 			    "can't be definitely removed.", pp->name);
 		} else {
 			G_STRIPE_DEBUG(1,
 			    "Device %s is still open (r%dw%de%d).", pp->name,
 			    pp->acr, pp->acw, pp->ace);
 			return (EBUSY);
 		}
 	}
 
 	gp = sc->sc_geom;
 	LIST_FOREACH_SAFE(cp, &gp->consumer, consumer, cp1) {
 		g_stripe_remove_disk(cp);
 		if (cp1 == NULL)
 			return (0);	/* Recursion happened. */
 	}
 	if (!LIST_EMPTY(&gp->consumer))
 		return (EINPROGRESS);
 
 	gp->softc = NULL;
 	KASSERT(sc->sc_provider == NULL, ("Provider still exists? (device=%s)",
 	    gp->name));
 	free(sc->sc_disks, M_STRIPE);
 	mtx_destroy(&sc->sc_lock);
 	free(sc, M_STRIPE);
 	G_STRIPE_DEBUG(0, "Device %s destroyed.", gp->name);
 	g_wither_geom(gp, ENXIO);
 	return (0);
 }
 
 static int
 g_stripe_destroy_geom(struct gctl_req *req __unused,
     struct g_class *mp __unused, struct g_geom *gp)
 {
 	struct g_stripe_softc *sc;
 
 	sc = gp->softc;
 	return (g_stripe_destroy(sc, 0));
 }
 
 static struct g_geom *
 g_stripe_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
 {
 	struct g_stripe_metadata md;
 	struct g_stripe_softc *sc;
 	struct g_consumer *cp;
 	struct g_geom *gp;
 	int error;
 
 	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
 	g_topology_assert();
 
 	/* Skip providers that are already open for writing. */
 	if (pp->acw > 0)
 		return (NULL);
 
 	G_STRIPE_DEBUG(3, "Tasting %s.", pp->name);
 
 	gp = g_new_geomf(mp, "stripe:taste");
 	gp->start = g_stripe_start;
 	gp->access = g_stripe_access;
 	gp->orphan = g_stripe_orphan;
 	cp = g_new_consumer(gp);
 	g_attach(cp, pp);
 	error = g_stripe_read_metadata(cp, &md);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 	g_destroy_geom(gp);
 	if (error != 0)
 		return (NULL);
 	gp = NULL;
 
 	if (strcmp(md.md_magic, G_STRIPE_MAGIC) != 0)
 		return (NULL);
 	if (md.md_version > G_STRIPE_VERSION) {
 		printf("geom_stripe.ko module is too old to handle %s.\n",
 		    pp->name);
 		return (NULL);
 	}
 	/*
 	 * Backward compatibility:
 	 */
 	/* There was no md_provider field in earlier versions of metadata. */
 	if (md.md_version < 2)
 		bzero(md.md_provider, sizeof(md.md_provider));
 	/* There was no md_provsize field in earlier versions of metadata. */
 	if (md.md_version < 3)
 		md.md_provsize = pp->mediasize;
 
 	if (md.md_provider[0] != '\0' &&
 	    !g_compare_names(md.md_provider, pp->name))
 		return (NULL);
 	if (md.md_provsize != pp->mediasize)
 		return (NULL);
 
 	/*
 	 * Let's check if device already exists.
 	 */
 	sc = NULL;
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		sc = gp->softc;
 		if (sc == NULL)
 			continue;
 		if (sc->sc_type != G_STRIPE_TYPE_AUTOMATIC)
 			continue;
 		if (strcmp(md.md_name, sc->sc_name) != 0)
 			continue;
 		if (md.md_id != sc->sc_id)
 			continue;
 		break;
 	}
 	if (gp != NULL) {
 		G_STRIPE_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
 		error = g_stripe_add_disk(sc, pp, md.md_no);
 		if (error != 0) {
 			G_STRIPE_DEBUG(0,
 			    "Cannot add disk %s to %s (error=%d).", pp->name,
 			    gp->name, error);
 			return (NULL);
 		}
 	} else {
 		gp = g_stripe_create(mp, &md, G_STRIPE_TYPE_AUTOMATIC);
 		if (gp == NULL) {
 			G_STRIPE_DEBUG(0, "Cannot create device %s.",
 			    md.md_name);
 			return (NULL);
 		}
 		sc = gp->softc;
 		G_STRIPE_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
 		error = g_stripe_add_disk(sc, pp, md.md_no);
 		if (error != 0) {
 			G_STRIPE_DEBUG(0,
 			    "Cannot add disk %s to %s (error=%d).", pp->name,
 			    gp->name, error);
 			g_stripe_destroy(sc, 1);
 			return (NULL);
 		}
 	}
 
 	return (gp);
 }
 
 static void
 g_stripe_ctl_create(struct gctl_req *req, struct g_class *mp)
 {
 	u_int attached, no;
 	struct g_stripe_metadata md;
 	struct g_provider *pp;
 	struct g_stripe_softc *sc;
 	struct g_geom *gp;
 	struct sbuf *sb;
 	off_t *stripesize;
 	const char *name;
 	char param[16];
 	int *nargs;
 
 	g_topology_assert();
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "No '%s' argument.", "nargs");
 		return;
 	}
 	if (*nargs <= 2) {
 		gctl_error(req, "Too few arguments.");
 		return;
 	}
 
 	strlcpy(md.md_magic, G_STRIPE_MAGIC, sizeof(md.md_magic));
 	md.md_version = G_STRIPE_VERSION;
 	name = gctl_get_asciiparam(req, "arg0");
 	if (name == NULL) {
 		gctl_error(req, "No 'arg%u' argument.", 0);
 		return;
 	}
 	strlcpy(md.md_name, name, sizeof(md.md_name));
 	md.md_id = arc4random();
 	md.md_no = 0;
 	md.md_all = *nargs - 1;
 	stripesize = gctl_get_paraml(req, "stripesize", sizeof(*stripesize));
 	if (stripesize == NULL) {
 		gctl_error(req, "No '%s' argument.", "stripesize");
 		return;
 	}
 	md.md_stripesize = (uint32_t)*stripesize;
 	bzero(md.md_provider, sizeof(md.md_provider));
 	/* This field is not important here. */
 	md.md_provsize = 0;
 
 	/* Check all providers are valid */
 	for (no = 1; no < *nargs; no++) {
 		snprintf(param, sizeof(param), "arg%u", no);
 		name = gctl_get_asciiparam(req, param);
 		if (name == NULL) {
 			gctl_error(req, "No 'arg%u' argument.", no);
 			return;
 		}
 		if (strncmp(name, "/dev/", strlen("/dev/")) == 0)
 			name += strlen("/dev/");
 		pp = g_provider_by_name(name);
 		if (pp == NULL) {
 			G_STRIPE_DEBUG(1, "Disk %s is invalid.", name);
 			gctl_error(req, "Disk %s is invalid.", name);
 			return;
 		}
 	}
 
 	gp = g_stripe_create(mp, &md, G_STRIPE_TYPE_MANUAL);
 	if (gp == NULL) {
 		gctl_error(req, "Can't configure %s.", md.md_name);
 		return;
 	}
 
 	sc = gp->softc;
 	sb = sbuf_new_auto();
 	sbuf_printf(sb, "Can't attach disk(s) to %s:", gp->name);
 	for (attached = 0, no = 1; no < *nargs; no++) {
 		snprintf(param, sizeof(param), "arg%u", no);
 		name = gctl_get_asciiparam(req, param);
 		if (name == NULL) {
 			gctl_error(req, "No 'arg%u' argument.", no);
 			continue;
 		}
 		if (strncmp(name, "/dev/", strlen("/dev/")) == 0)
 			name += strlen("/dev/");
 		pp = g_provider_by_name(name);
 		KASSERT(pp != NULL, ("Provider %s disappear?!", name));
 		if (g_stripe_add_disk(sc, pp, no - 1) != 0) {
 			G_STRIPE_DEBUG(1, "Disk %u (%s) not attached to %s.",
 			    no, pp->name, gp->name);
 			sbuf_printf(sb, " %s", pp->name);
 			continue;
 		}
 		attached++;
 	}
 	sbuf_finish(sb);
 	if (md.md_all != attached) {
 		g_stripe_destroy(gp->softc, 1);
 		gctl_error(req, "%s", sbuf_data(sb));
 	}
 	sbuf_delete(sb);
 }
 
 static struct g_stripe_softc *
 g_stripe_find_device(struct g_class *mp, const char *name)
 {
 	struct g_stripe_softc *sc;
 	struct g_geom *gp;
 
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		sc = gp->softc;
 		if (sc == NULL)
 			continue;
 		if (strcmp(sc->sc_name, name) == 0)
 			return (sc);
 	}
 	return (NULL);
 }
 
 static void
 g_stripe_ctl_destroy(struct gctl_req *req, struct g_class *mp)
 {
 	struct g_stripe_softc *sc;
 	int *force, *nargs, error;
 	const char *name;
 	char param[16];
 	u_int i;
 
 	g_topology_assert();
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "No '%s' argument.", "nargs");
 		return;
 	}
 	if (*nargs <= 0) {
 		gctl_error(req, "Missing device(s).");
 		return;
 	}
 	force = gctl_get_paraml(req, "force", sizeof(*force));
 	if (force == NULL) {
 		gctl_error(req, "No '%s' argument.", "force");
 		return;
 	}
 
 	for (i = 0; i < (u_int)*nargs; i++) {
 		snprintf(param, sizeof(param), "arg%u", i);
 		name = gctl_get_asciiparam(req, param);
 		if (name == NULL) {
 			gctl_error(req, "No 'arg%u' argument.", i);
 			return;
 		}
 		sc = g_stripe_find_device(mp, name);
 		if (sc == NULL) {
 			gctl_error(req, "No such device: %s.", name);
 			return;
 		}
 		error = g_stripe_destroy(sc, *force);
 		if (error != 0) {
 			gctl_error(req, "Cannot destroy device %s (error=%d).",
 			    sc->sc_name, error);
 			return;
 		}
 	}
 }
 
 static void
 g_stripe_config(struct gctl_req *req, struct g_class *mp, const char *verb)
 {
 	uint32_t *version;
 
 	g_topology_assert();
 
 	version = gctl_get_paraml(req, "version", sizeof(*version));
 	if (version == NULL) {
 		gctl_error(req, "No '%s' argument.", "version");
 		return;
 	}
 	if (*version != G_STRIPE_VERSION) {
 		gctl_error(req, "Userland and kernel parts are out of sync.");
 		return;
 	}
 
 	if (strcmp(verb, "create") == 0) {
 		g_stripe_ctl_create(req, mp);
 		return;
 	} else if (strcmp(verb, "destroy") == 0 ||
 	    strcmp(verb, "stop") == 0) {
 		g_stripe_ctl_destroy(req, mp);
 		return;
 	}
 
 	gctl_error(req, "Unknown verb.");
 }
 
 static void
 g_stripe_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
     struct g_consumer *cp, struct g_provider *pp)
 {
 	struct g_stripe_softc *sc;
 
 	sc = gp->softc;
 	if (sc == NULL)
 		return;
 	if (pp != NULL) {
 		/* Nothing here. */
 	} else if (cp != NULL) {
 		sbuf_printf(sb, "%s<Number>%u</Number>\n", indent,
 		    (u_int)cp->index);
 	} else {
 		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id);
 		sbuf_printf(sb, "%s<Stripesize>%ju</Stripesize>\n", indent,
 		    (uintmax_t)sc->sc_stripesize);
 		sbuf_printf(sb, "%s<Type>", indent);
 		switch (sc->sc_type) {
 		case G_STRIPE_TYPE_AUTOMATIC:
 			sbuf_cat(sb, "AUTOMATIC");
 			break;
 		case G_STRIPE_TYPE_MANUAL:
 			sbuf_cat(sb, "MANUAL");
 			break;
 		default:
 			sbuf_cat(sb, "UNKNOWN");
 			break;
 		}
 		sbuf_cat(sb, "</Type>\n");
 		sbuf_printf(sb, "%s<Status>Total=%u, Online=%u</Status>\n",
 		    indent, sc->sc_ndisks, g_stripe_nvalid(sc));
 		sbuf_printf(sb, "%s<State>", indent);
 		if (sc->sc_provider != NULL && sc->sc_provider->error == 0)
 			sbuf_cat(sb, "UP");
 		else
 			sbuf_cat(sb, "DOWN");
 		sbuf_cat(sb, "</State>\n");
 	}
 }
 
 DECLARE_GEOM_CLASS(g_stripe_class, g_stripe);
 MODULE_VERSION(geom_stripe, 0);
Index: head/sys/geom/stripe/g_stripe.h
===================================================================
--- head/sys/geom/stripe/g_stripe.h	(revision 350693)
+++ head/sys/geom/stripe/g_stripe.h	(revision 350694)
@@ -1,126 +1,111 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2004-2005 Pawel Jakub Dawidek <pjd@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef	_G_STRIPE_H_
 #define	_G_STRIPE_H_
 
 #include <sys/endian.h>
 
 #define	G_STRIPE_CLASS_NAME	"STRIPE"
 
 #define	G_STRIPE_MAGIC		"GEOM::STRIPE"
 /*
  * Version history:
  * 0 - Initial version number.
  * 1 - Added 'stop' command for gstripe(8).
  * 2 - Added md_provider field to metadata and '-h' option for gstripe(8).
  * 3 - Added md_provsize field to metadata.
  */
 #define	G_STRIPE_VERSION	3
 
 #ifdef _KERNEL
 #define	G_STRIPE_TYPE_MANUAL	0
 #define	G_STRIPE_TYPE_AUTOMATIC	1
 
-#define	G_STRIPE_DEBUG(lvl, ...)	do {				\
-	if (g_stripe_debug >= (lvl)) {					\
-		printf("GEOM_STRIPE");					\
-		if (g_stripe_debug > 0)					\
-			printf("[%u]", lvl);				\
-		printf(": ");						\
-		printf(__VA_ARGS__);					\
-		printf("\n");						\
-	}								\
-} while (0)
-#define	G_STRIPE_LOGREQ(bp, ...)	do {				\
-	if (g_stripe_debug >= 2) {					\
-		printf("GEOM_STRIPE[2]: ");				\
-		printf(__VA_ARGS__);					\
-		printf(" ");						\
-		g_print_bio(bp);					\
-		printf("\n");						\
-	}								\
-} while (0)
+#define	G_STRIPE_DEBUG(lvl, ...) \
+    _GEOM_DEBUG("GEOM_STRIPE", g_stripe_debug, (lvl), NULL, __VA_ARGS__)
+#define	G_STRIPE_LOGREQ(bp, ...) \
+    _GEOM_DEBUG("GEOM_STRIPE", g_stripe_debug, 2, (bp), __VA_ARGS__)
 
 struct g_stripe_softc {
 	u_int		 sc_type;	/* provider type */
 	struct g_geom	*sc_geom;
 	struct g_provider *sc_provider;
 	uint32_t	 sc_id;		/* stripe unique ID */
 	struct g_consumer **sc_disks;
 	uint16_t	 sc_ndisks;
 	off_t		 sc_stripesize;
 	uint32_t	 sc_stripebits;
 	struct mtx	 sc_lock;
 };
 #define	sc_name	sc_geom->name
 #endif	/* _KERNEL */
 
 struct g_stripe_metadata {
 	char		md_magic[16];	/* Magic value. */
 	uint32_t	md_version;	/* Version number. */
 	char		md_name[16];	/* Stripe name. */
 	uint32_t	md_id;		/* Unique ID. */
 	uint16_t	md_no;		/* Disk number. */
 	uint16_t	md_all;		/* Number of all disks. */
 	uint32_t	md_stripesize;	/* Stripe size. */
 	char		md_provider[16]; /* Hardcoded provider. */
 	uint64_t	md_provsize;	/* Provider's size. */
 };
 static __inline void
 stripe_metadata_encode(const struct g_stripe_metadata *md, u_char *data)
 {
 
 	bcopy(md->md_magic, data, sizeof(md->md_magic));
 	le32enc(data + 16, md->md_version);
 	bcopy(md->md_name, data + 20, sizeof(md->md_name));
 	le32enc(data + 36, md->md_id);
 	le16enc(data + 40, md->md_no);
 	le16enc(data + 42, md->md_all);
 	le32enc(data + 44, md->md_stripesize);
 	bcopy(md->md_provider, data + 48, sizeof(md->md_provider));
 	le64enc(data + 64, md->md_provsize);
 }
 static __inline void
 stripe_metadata_decode(const u_char *data, struct g_stripe_metadata *md)
 {
 
 	bcopy(data, md->md_magic, sizeof(md->md_magic));
 	md->md_version = le32dec(data + 16);
 	bcopy(data + 20, md->md_name, sizeof(md->md_name));
 	md->md_id = le32dec(data + 36);
 	md->md_no = le16dec(data + 40);
 	md->md_all = le16dec(data + 42);
 	md->md_stripesize = le32dec(data + 44);
 	bcopy(data + 48, md->md_provider, sizeof(md->md_provider));
 	md->md_provsize = le64dec(data + 64);
 }
 
 #endif	/* _G_STRIPE_H_ */
Index: head/sys/geom/vinum/geom_vinum.c
===================================================================
--- head/sys/geom/vinum/geom_vinum.c	(revision 350693)
+++ head/sys/geom/vinum/geom_vinum.c	(revision 350694)
@@ -1,1051 +1,1052 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  *  Copyright (c) 2004, 2007 Lukas Ertl
  *  Copyright (c) 2007, 2009 Ulf Lilleengen
  *  All rights reserved.
  * 
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 
  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/bio.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/mutex.h>
 #include <sys/sbuf.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 
 #include <geom/geom.h>
+#include <geom/geom_dbg.h>
 #include <geom/vinum/geom_vinum_var.h>
 #include <geom/vinum/geom_vinum.h>
 #include <geom/vinum/geom_vinum_raid5.h>
 
 SYSCTL_DECL(_kern_geom);
 static SYSCTL_NODE(_kern_geom, OID_AUTO, vinum, CTLFLAG_RW, 0,
     "GEOM_VINUM stuff");
 u_int g_vinum_debug = 0;
 SYSCTL_UINT(_kern_geom_vinum, OID_AUTO, debug, CTLFLAG_RWTUN, &g_vinum_debug, 0,
     "Debug level");
 
 static int	gv_create(struct g_geom *, struct gctl_req *);
 static void	gv_attach(struct gv_softc *, struct gctl_req *);
 static void	gv_detach(struct gv_softc *, struct gctl_req *);
 static void	gv_parityop(struct gv_softc *, struct gctl_req *);
 
 
 static void
 gv_orphan(struct g_consumer *cp)
 {
 	struct g_geom *gp;
 	struct gv_softc *sc;
 	struct gv_drive *d;
 	
 	g_topology_assert();
 
 	KASSERT(cp != NULL, ("gv_orphan: null cp"));
 	gp = cp->geom;
 	KASSERT(gp != NULL, ("gv_orphan: null gp"));
 	sc = gp->softc;
 	KASSERT(sc != NULL, ("gv_orphan: null sc"));
 	d = cp->private;
 	KASSERT(d != NULL, ("gv_orphan: null d"));
 
 	g_trace(G_T_TOPOLOGY, "gv_orphan(%s)", gp->name);
 
 	gv_post_event(sc, GV_EVENT_DRIVE_LOST, d, NULL, 0, 0);
 }
 
 void
 gv_start(struct bio *bp)
 {
 	struct g_geom *gp;
 	struct gv_softc *sc;
 	
 	gp = bp->bio_to->geom;
 	sc = gp->softc;
 
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 	case BIO_WRITE:
 	case BIO_DELETE:
 		break;
 	case BIO_GETATTR:
 	default:
 		g_io_deliver(bp, EOPNOTSUPP);
 		return;
 	}
 	mtx_lock(&sc->bqueue_mtx);
 	bioq_disksort(sc->bqueue_down, bp);
 	wakeup(sc);
 	mtx_unlock(&sc->bqueue_mtx);
 }
 
 void
 gv_done(struct bio *bp)
 {
 	struct g_geom *gp;
 	struct gv_softc *sc;
 	
 	KASSERT(bp != NULL, ("NULL bp"));
 
 	gp = bp->bio_from->geom;
 	sc = gp->softc;
 
 	mtx_lock(&sc->bqueue_mtx);
 	bioq_disksort(sc->bqueue_up, bp);
 	wakeup(sc);
 	mtx_unlock(&sc->bqueue_mtx);
 }
 
 int
 gv_access(struct g_provider *pp, int dr, int dw, int de)
 {
 	struct g_geom *gp;
 	struct gv_softc *sc;
 	struct gv_drive *d, *d2;
 	int error;
 	
 	gp = pp->geom;
 	sc = gp->softc;
 	/*
 	 * We want to modify the read count with the write count in case we have
 	 * plexes in a RAID-5 organization.
 	 */
 	dr += dw;
 
 	LIST_FOREACH(d, &sc->drives, drive) {
 		if (d->consumer == NULL)
 			continue;
 		error = g_access(d->consumer, dr, dw, de);
 		if (error) {
 			LIST_FOREACH(d2, &sc->drives, drive) {
 				if (d == d2)
 					break;
 				g_access(d2->consumer, -dr, -dw, -de);
 			}
 			G_VINUM_DEBUG(0, "g_access '%s' failed: %d", d->name,
 			    error);
 			return (error);
 		}
 	}
 	return (0);
 }
 
 static void
 gv_init(struct g_class *mp)
 {
 	struct g_geom *gp;
 	struct gv_softc *sc;
 
 	g_trace(G_T_TOPOLOGY, "gv_init(%p)", mp);
 
 	gp = g_new_geomf(mp, "VINUM");
 	gp->spoiled = gv_orphan;
 	gp->orphan = gv_orphan;
 	gp->access = gv_access;
 	gp->start = gv_start;
 	gp->softc = g_malloc(sizeof(struct gv_softc), M_WAITOK | M_ZERO);
 	sc = gp->softc;
 	sc->geom = gp;
 	sc->bqueue_down = g_malloc(sizeof(struct bio_queue_head),
 	    M_WAITOK | M_ZERO);
 	sc->bqueue_up = g_malloc(sizeof(struct bio_queue_head),
 	    M_WAITOK | M_ZERO);
 	bioq_init(sc->bqueue_down);
 	bioq_init(sc->bqueue_up);
 	LIST_INIT(&sc->drives);
 	LIST_INIT(&sc->subdisks);
 	LIST_INIT(&sc->plexes);
 	LIST_INIT(&sc->volumes);
 	TAILQ_INIT(&sc->equeue);
 	mtx_init(&sc->config_mtx, "gv_config", NULL, MTX_DEF);
 	mtx_init(&sc->equeue_mtx, "gv_equeue", NULL, MTX_DEF);
 	mtx_init(&sc->bqueue_mtx, "gv_bqueue", NULL, MTX_DEF);
 	kproc_create(gv_worker, sc, &sc->worker, 0, 0, "gv_worker");
 }
 
 static int
 gv_unload(struct gctl_req *req, struct g_class *mp, struct g_geom *gp)
 {
 	struct gv_softc *sc;
 
 	g_trace(G_T_TOPOLOGY, "gv_unload(%p)", mp);
 
 	g_topology_assert();
 	sc = gp->softc;
 
 	if (sc != NULL) {
 		gv_worker_exit(sc);
 		gp->softc = NULL;
 		g_wither_geom(gp, ENXIO);
 	}
 
 	return (0);
 }
 
 /* Handle userland request of attaching object. */
 static void
 gv_attach(struct gv_softc *sc, struct gctl_req *req)
 {
 	struct gv_volume *v;
 	struct gv_plex *p;
 	struct gv_sd *s;
 	off_t *offset;
 	int *rename, type_child, type_parent;
 	char *child, *parent;
 
 	child = gctl_get_param(req, "child", NULL);
 	if (child == NULL) {
 		gctl_error(req, "no child given");
 		return;
 	}
 	parent = gctl_get_param(req, "parent", NULL);
 	if (parent == NULL) {
 		gctl_error(req, "no parent given");
 		return;
 	}
 	offset = gctl_get_paraml(req, "offset", sizeof(*offset));
 	if (offset == NULL) {
 		gctl_error(req, "no offset given");
 		return;
 	}
 	rename = gctl_get_paraml(req, "rename", sizeof(*rename));
 	if (rename == NULL) {
 		gctl_error(req, "no rename flag given");
 		return;
 	}
 
 	type_child = gv_object_type(sc, child);
 	type_parent = gv_object_type(sc, parent);
 
 	switch (type_child) {
 	case GV_TYPE_PLEX:
 		if (type_parent != GV_TYPE_VOL) {
 			gctl_error(req, "no such volume to attach to");
 			return;
 		}
 		v = gv_find_vol(sc, parent);
 		p = gv_find_plex(sc, child);
 		gv_post_event(sc, GV_EVENT_ATTACH_PLEX, p, v, *offset, *rename);
 		break;
 	case GV_TYPE_SD:
 		if (type_parent != GV_TYPE_PLEX) {
 			gctl_error(req, "no such plex to attach to");
 			return;
 		}
 		p = gv_find_plex(sc, parent);
 		s = gv_find_sd(sc, child);
 		gv_post_event(sc, GV_EVENT_ATTACH_SD, s, p, *offset, *rename);
 		break;
 	default:
 		gctl_error(req, "invalid child type");
 		break;
 	}
 }
 
 /* Handle userland request of detaching object. */
 static void
 gv_detach(struct gv_softc *sc, struct gctl_req *req)
 {
 	struct gv_plex *p;
 	struct gv_sd *s;
 	int *flags, type;
 	char *object;
 
 	object = gctl_get_param(req, "object", NULL);
 	if (object == NULL) {
 		gctl_error(req, "no argument given");
 		return;
 	}
 
 	flags = gctl_get_paraml(req, "flags", sizeof(*flags));
 	type = gv_object_type(sc, object);
 	switch (type) {
 	case GV_TYPE_PLEX:
 		p = gv_find_plex(sc, object);
 		gv_post_event(sc, GV_EVENT_DETACH_PLEX, p, NULL, *flags, 0);
 		break;
 	case GV_TYPE_SD:
 		s = gv_find_sd(sc, object);
 		gv_post_event(sc, GV_EVENT_DETACH_SD, s, NULL, *flags, 0);
 		break;
 	default:
 		gctl_error(req, "invalid object type");
 		break;
 	}
 }
 
 /* Handle userland requests for creating new objects. */
 static int
 gv_create(struct g_geom *gp, struct gctl_req *req)
 {
 	struct gv_softc *sc;
 	struct gv_drive *d, *d2;
 	struct gv_plex *p, *p2;
 	struct gv_sd *s, *s2;
 	struct gv_volume *v, *v2;
 	struct g_provider *pp;
 	int error, i, *drives, *flags, *plexes, *subdisks, *volumes;
 	char buf[20];
 
 	g_topology_assert();
 
 	sc = gp->softc;
 
 	/* Find out how many of each object have been passed in. */
 	volumes = gctl_get_paraml(req, "volumes", sizeof(*volumes));
 	plexes = gctl_get_paraml(req, "plexes", sizeof(*plexes));
 	subdisks = gctl_get_paraml(req, "subdisks", sizeof(*subdisks));
 	drives = gctl_get_paraml(req, "drives", sizeof(*drives));
 	if (volumes == NULL || plexes == NULL || subdisks == NULL ||
 	    drives == NULL) {
 		gctl_error(req, "number of objects not given");
 		return (-1);
 	}
 	flags = gctl_get_paraml(req, "flags", sizeof(*flags));
 	if (flags == NULL) {
 		gctl_error(req, "flags not given");
 		return (-1);
 	}
 
 	/* First, handle drive definitions ... */
 	for (i = 0; i < *drives; i++) {
 		snprintf(buf, sizeof(buf), "drive%d", i);
 		d2 = gctl_get_paraml(req, buf, sizeof(*d2));
 		if (d2 == NULL) {
 			gctl_error(req, "no drive definition given");
 			return (-1);
 		}
 		/*
 		 * Make sure that the device specified in the drive config is
 		 * an active GEOM provider.
 		 */
 		pp = g_provider_by_name(d2->device);
 		if (pp == NULL) {
 			gctl_error(req, "%s: device not found", d2->device);
 			goto error;
 		}
 		if (gv_find_drive(sc, d2->name) != NULL) {
 			/* Ignore error. */
 			if (*flags & GV_FLAG_F)
 				continue;
 			gctl_error(req, "drive '%s' already exists", d2->name);
 			goto error;
 		}
 		if (gv_find_drive_device(sc, d2->device) != NULL) {
 			gctl_error(req, "device '%s' already configured in "
 			    "gvinum", d2->device);
 			goto error;
 		}
 
 
 		d = g_malloc(sizeof(*d), M_WAITOK | M_ZERO);
 		bcopy(d2, d, sizeof(*d));
 
 		gv_post_event(sc, GV_EVENT_CREATE_DRIVE, d, NULL, 0, 0);
 	}
 
 	/* ... then volume definitions ... */
 	for (i = 0; i < *volumes; i++) {
 		error = 0;
 		snprintf(buf, sizeof(buf), "volume%d", i);
 		v2 = gctl_get_paraml(req, buf, sizeof(*v2));
 		if (v2 == NULL) {
 			gctl_error(req, "no volume definition given");
 			return (-1);
 		}
 		if (gv_find_vol(sc, v2->name) != NULL) {
 			/* Ignore error. */
 			if (*flags & GV_FLAG_F)
 				continue;
 			gctl_error(req, "volume '%s' already exists", v2->name);
 			goto error;
 		}
 
 		v = g_malloc(sizeof(*v), M_WAITOK | M_ZERO);
 		bcopy(v2, v, sizeof(*v));
 
 		gv_post_event(sc, GV_EVENT_CREATE_VOLUME, v, NULL, 0, 0);
 	}
 
 	/* ... then plex definitions ... */
 	for (i = 0; i < *plexes; i++) {
 		error = 0;
 		snprintf(buf, sizeof(buf), "plex%d", i);
 		p2 = gctl_get_paraml(req, buf, sizeof(*p2));
 		if (p2 == NULL) {
 			gctl_error(req, "no plex definition given");
 			return (-1);
 		}
 		if (gv_find_plex(sc, p2->name) != NULL) {
 			/* Ignore error. */
 			if (*flags & GV_FLAG_F)
 				continue;
 			gctl_error(req, "plex '%s' already exists", p2->name);
 			goto error;
 		}
 
 		p = g_malloc(sizeof(*p), M_WAITOK | M_ZERO);
 		bcopy(p2, p, sizeof(*p));
 
 		gv_post_event(sc, GV_EVENT_CREATE_PLEX, p, NULL, 0, 0);
 	}
 
 	/* ... and, finally, subdisk definitions. */
 	for (i = 0; i < *subdisks; i++) {
 		error = 0;
 		snprintf(buf, sizeof(buf), "sd%d", i);
 		s2 = gctl_get_paraml(req, buf, sizeof(*s2));
 		if (s2 == NULL) {
 			gctl_error(req, "no subdisk definition given");
 			return (-1);
 		}
 		if (gv_find_sd(sc, s2->name) != NULL) {
 			/* Ignore error. */
 			if (*flags & GV_FLAG_F)
 				continue;
 			gctl_error(req, "sd '%s' already exists", s2->name);
 			goto error;
 		}
 
 		s = g_malloc(sizeof(*s), M_WAITOK | M_ZERO);
 		bcopy(s2, s, sizeof(*s));
 
 		gv_post_event(sc, GV_EVENT_CREATE_SD, s, NULL, 0, 0);
 	}
 
 error:
 	gv_post_event(sc, GV_EVENT_SETUP_OBJECTS, sc, NULL, 0, 0);
 	gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0);
 
 	return (0);
 }
 
 static void
 gv_config(struct gctl_req *req, struct g_class *mp, char const *verb)
 {
 	struct g_geom *gp;
 	struct gv_softc *sc;
 	struct sbuf *sb;
 	char *comment;
 
 	g_topology_assert();
 
 	gp = LIST_FIRST(&mp->geom);
 	sc = gp->softc;
 
 	if (!strcmp(verb, "attach")) {
 		gv_attach(sc, req);
 
 	} else if (!strcmp(verb, "concat")) {
 		gv_concat(gp, req);
 
 	} else if (!strcmp(verb, "detach")) {
 		gv_detach(sc, req);
 
 	} else if (!strcmp(verb, "list")) {
 		gv_list(gp, req);
 
 	/* Save our configuration back to disk. */
 	} else if (!strcmp(verb, "saveconfig")) {
 		gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0);
 
 	/* Return configuration in string form. */
 	} else if (!strcmp(verb, "getconfig")) {
 		comment = gctl_get_param(req, "comment", NULL);
 		if (comment == NULL) {
 			gctl_error(req, "no comment parameter given");
 			return;
 		}
 		sb = sbuf_new(NULL, NULL, GV_CFG_LEN, SBUF_FIXEDLEN);
 		gv_format_config(sc, sb, 0, comment);
 		sbuf_finish(sb);
 		gctl_set_param(req, "config", sbuf_data(sb), sbuf_len(sb) + 1);
 		sbuf_delete(sb);
 
 	} else if (!strcmp(verb, "create")) {
 		gv_create(gp, req);
 
 	} else if (!strcmp(verb, "mirror")) {
 		gv_mirror(gp, req);
 
 	} else if (!strcmp(verb, "move")) {
 		gv_move(gp, req);
 
 	} else if (!strcmp(verb, "raid5")) {
 		gv_raid5(gp, req);
 
 	} else if (!strcmp(verb, "rebuildparity") ||
 	    !strcmp(verb, "checkparity")) {
 		gv_parityop(sc, req);
 
 	} else if (!strcmp(verb, "remove")) {
 		gv_remove(gp, req);
 
 	} else if (!strcmp(verb, "rename")) {
 		gv_rename(gp, req);
 	
 	} else if (!strcmp(verb, "resetconfig")) {
 		gv_post_event(sc, GV_EVENT_RESET_CONFIG, sc, NULL, 0, 0);
 
 	} else if (!strcmp(verb, "start")) {
 		gv_start_obj(gp, req);
 
 	} else if (!strcmp(verb, "stripe")) {
 		gv_stripe(gp, req);
 
 	} else if (!strcmp(verb, "setstate")) {
 		gv_setstate(gp, req);
 	} else
 		gctl_error(req, "Unknown verb parameter");
 }
 
 static void
 gv_parityop(struct gv_softc *sc, struct gctl_req *req)
 {
 	struct gv_plex *p;
 	int *flags, *rebuild, type;
 	char *plex;
 
 	plex = gctl_get_param(req, "plex", NULL);
 	if (plex == NULL) {
 		gctl_error(req, "no plex given");
 		return;
 	}
 
 	flags = gctl_get_paraml(req, "flags", sizeof(*flags));
 	if (flags == NULL) {
 		gctl_error(req, "no flags given");
 		return;
 	}
 
 	rebuild = gctl_get_paraml(req, "rebuild", sizeof(*rebuild));
 	if (rebuild == NULL) {
 		gctl_error(req, "no operation given");
 		return;
 	}
 
 	type = gv_object_type(sc, plex);
 	if (type != GV_TYPE_PLEX) {
 		gctl_error(req, "'%s' is not a plex", plex);
 		return;
 	}
 	p = gv_find_plex(sc, plex);
 
 	if (p->state != GV_PLEX_UP) {
 		gctl_error(req, "plex %s is not completely accessible",
 		    p->name);
 		return;
 	}
 
 	if (p->org != GV_PLEX_RAID5) {
 		gctl_error(req, "plex %s is not a RAID5 plex", p->name);
 		return;
 	}
 
 	/* Put it in the event queue. */
 	/* XXX: The state of the plex might have changed when this event is
 	 * picked up ... We should perhaps check this afterwards. */
 	if (*rebuild)
 		gv_post_event(sc, GV_EVENT_PARITY_REBUILD, p, NULL, 0, 0);
 	else
 		gv_post_event(sc, GV_EVENT_PARITY_CHECK, p, NULL, 0, 0);
 }
 
 
 static struct g_geom *
 gv_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
 {
 	struct g_geom *gp;
 	struct g_consumer *cp;
 	struct gv_softc *sc;
 	struct gv_hdr vhdr;
 	int error;
 
  	g_topology_assert();
 	g_trace(G_T_TOPOLOGY, "gv_taste(%s, %s)", mp->name, pp->name);
 
 	gp = LIST_FIRST(&mp->geom);
 	if (gp == NULL) {
 		G_VINUM_DEBUG(0, "error: tasting, but not initialized?");
 		return (NULL);
 	}
 	sc = gp->softc;
 
 	cp = g_new_consumer(gp);
 	if (g_attach(cp, pp) != 0) {
 		g_destroy_consumer(cp);
 		return (NULL);
 	}
 	if (g_access(cp, 1, 0, 0) != 0) {
 		g_detach(cp);
 		g_destroy_consumer(cp);
 		return (NULL);
 	}
 	g_topology_unlock();
 
 	error = gv_read_header(cp, &vhdr);
 
 	g_topology_lock();
 	g_access(cp, -1, 0, 0);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 
 	/* Check if what we've been given is a valid vinum drive. */
 	if (!error)
 		gv_post_event(sc, GV_EVENT_DRIVE_TASTED, pp, NULL, 0, 0);
 
 	return (NULL);
 }
 
 void
 gv_worker(void *arg)
 {
 	struct g_provider *pp;
 	struct gv_softc *sc;
 	struct gv_event *ev;
 	struct gv_volume *v;
 	struct gv_plex *p;
 	struct gv_sd *s;
 	struct gv_drive *d;
 	struct bio *bp;
 	int newstate, flags, err, rename;
 	char *newname;
 	off_t offset;
 
 	sc = arg;
 	KASSERT(sc != NULL, ("NULL sc"));
 	for (;;) {
 		/* Look at the events first... */
 		ev = gv_get_event(sc);
 		if (ev != NULL) {
 			gv_remove_event(sc, ev);
 
 			switch (ev->type) {
 			case GV_EVENT_DRIVE_TASTED:
 				G_VINUM_DEBUG(2, "event 'drive tasted'");
 				pp = ev->arg1;
 				gv_drive_tasted(sc, pp);
 				break;
 
 			case GV_EVENT_DRIVE_LOST:
 				G_VINUM_DEBUG(2, "event 'drive lost'");
 				d = ev->arg1;
 				gv_drive_lost(sc, d);
 				break;
 
 			case GV_EVENT_CREATE_DRIVE:
 				G_VINUM_DEBUG(2, "event 'create drive'");
 				d = ev->arg1;
 				gv_create_drive(sc, d);
 				break;
 
 			case GV_EVENT_CREATE_VOLUME:
 				G_VINUM_DEBUG(2, "event 'create volume'");
 				v = ev->arg1;
 				gv_create_volume(sc, v);
 				break;
 
 			case GV_EVENT_CREATE_PLEX:
 				G_VINUM_DEBUG(2, "event 'create plex'");
 				p = ev->arg1;
 				gv_create_plex(sc, p);
 				break;
 
 			case GV_EVENT_CREATE_SD:
 				G_VINUM_DEBUG(2, "event 'create sd'");
 				s = ev->arg1;
 				gv_create_sd(sc, s);
 				break;
 
 			case GV_EVENT_RM_DRIVE:
 				G_VINUM_DEBUG(2, "event 'remove drive'");
 				d = ev->arg1;
 				flags = ev->arg3;
 				gv_rm_drive(sc, d, flags);
 				/*gv_setup_objects(sc);*/
 				break;
 
 			case GV_EVENT_RM_VOLUME:
 				G_VINUM_DEBUG(2, "event 'remove volume'");
 				v = ev->arg1;
 				gv_rm_vol(sc, v);
 				/*gv_setup_objects(sc);*/
 				break;
 
 			case GV_EVENT_RM_PLEX:
 				G_VINUM_DEBUG(2, "event 'remove plex'");
 				p = ev->arg1;
 				gv_rm_plex(sc, p);
 				/*gv_setup_objects(sc);*/
 				break;
 
 			case GV_EVENT_RM_SD:
 				G_VINUM_DEBUG(2, "event 'remove sd'");
 				s = ev->arg1;
 				gv_rm_sd(sc, s);
 				/*gv_setup_objects(sc);*/
 				break;
 
 			case GV_EVENT_SAVE_CONFIG:
 				G_VINUM_DEBUG(2, "event 'save config'");
 				gv_save_config(sc);
 				break;
 
 			case GV_EVENT_SET_SD_STATE:
 				G_VINUM_DEBUG(2, "event 'setstate sd'");
 				s = ev->arg1;
 				newstate = ev->arg3;
 				flags = ev->arg4;
 				err = gv_set_sd_state(s, newstate, flags);
 				if (err)
 					G_VINUM_DEBUG(0, "error setting subdisk"
 					    " state: error code %d", err);
 				break;
 
 			case GV_EVENT_SET_DRIVE_STATE:
 				G_VINUM_DEBUG(2, "event 'setstate drive'");
 				d = ev->arg1;
 				newstate = ev->arg3;
 				flags = ev->arg4;
 				err = gv_set_drive_state(d, newstate, flags);
 				if (err)
 					G_VINUM_DEBUG(0, "error setting drive "
 					    "state: error code %d", err);
 				break;
 
 			case GV_EVENT_SET_VOL_STATE:
 				G_VINUM_DEBUG(2, "event 'setstate volume'");
 				v = ev->arg1;
 				newstate = ev->arg3;
 				flags = ev->arg4;
 				err = gv_set_vol_state(v, newstate, flags);
 				if (err)
 					G_VINUM_DEBUG(0, "error setting volume "
 					    "state: error code %d", err);
 				break;
 
 			case GV_EVENT_SET_PLEX_STATE:
 				G_VINUM_DEBUG(2, "event 'setstate plex'");
 				p = ev->arg1;
 				newstate = ev->arg3;
 				flags = ev->arg4;
 				err = gv_set_plex_state(p, newstate, flags);
 				if (err)
 					G_VINUM_DEBUG(0, "error setting plex "
 					    "state: error code %d", err);
 				break;
 
 			case GV_EVENT_SETUP_OBJECTS:
 				G_VINUM_DEBUG(2, "event 'setup objects'");
 				gv_setup_objects(sc);
 				break;
 
 			case GV_EVENT_RESET_CONFIG:
 				G_VINUM_DEBUG(2, "event 'resetconfig'");
 				err = gv_resetconfig(sc);
 				if (err)
 					G_VINUM_DEBUG(0, "error resetting "
 					    "config: error code %d", err);
 				break;
 
 			case GV_EVENT_PARITY_REBUILD:
 				/*
 				 * Start the rebuild. The gv_plex_done will
 				 * handle issuing of the remaining rebuild bio's
 				 * until it's finished. 
 				 */
 				G_VINUM_DEBUG(2, "event 'rebuild'");
 				p = ev->arg1;
 				if (p->state != GV_PLEX_UP) {
 					G_VINUM_DEBUG(0, "plex %s is not "
 					    "completely accessible", p->name);
 					break;
 				}
 				if (p->flags & GV_PLEX_SYNCING ||
 				    p->flags & GV_PLEX_REBUILDING ||
 				    p->flags & GV_PLEX_GROWING) {
 					G_VINUM_DEBUG(0, "plex %s is busy with "
 					    "syncing or parity build", p->name);
 					break;
 				}
 				p->synced = 0;
 				p->flags |= GV_PLEX_REBUILDING;
 				g_topology_assert_not();
 				g_topology_lock();
 				err = gv_access(p->vol_sc->provider, 1, 1, 0);
 				if (err) {
 					G_VINUM_DEBUG(0, "unable to access "
 					    "provider");
 					break;
 				}
 				g_topology_unlock();
 				gv_parity_request(p, GV_BIO_CHECK |
 				    GV_BIO_PARITY, 0);
 				break;
 
 			case GV_EVENT_PARITY_CHECK:
 				/* Start parity check. */
 				G_VINUM_DEBUG(2, "event 'check'");
 				p = ev->arg1;
 				if (p->state != GV_PLEX_UP) {
 					G_VINUM_DEBUG(0, "plex %s is not "
 					    "completely accessible", p->name);
 					break;
 				}
 				if (p->flags & GV_PLEX_SYNCING ||
 				    p->flags & GV_PLEX_REBUILDING ||
 				    p->flags & GV_PLEX_GROWING) {
 					G_VINUM_DEBUG(0, "plex %s is busy with "
 					    "syncing or parity build", p->name);
 					break;
 				}
 				p->synced = 0;
 				g_topology_assert_not();
 				g_topology_lock();
 				err = gv_access(p->vol_sc->provider, 1, 1, 0);
 				if (err) {
 					G_VINUM_DEBUG(0, "unable to access "
 					    "provider");
 					break;
 				}
 				g_topology_unlock();
 				gv_parity_request(p, GV_BIO_CHECK, 0);
 				break;
 
 			case GV_EVENT_START_PLEX:
 				G_VINUM_DEBUG(2, "event 'start' plex");
 				p = ev->arg1;
 				gv_start_plex(p);
 				break;
 
 			case GV_EVENT_START_VOLUME:
 				G_VINUM_DEBUG(2, "event 'start' volume");
 				v = ev->arg1;
 				gv_start_vol(v);
 				break;
 
 			case GV_EVENT_ATTACH_PLEX:
 				G_VINUM_DEBUG(2, "event 'attach' plex");
 				p = ev->arg1;
 				v = ev->arg2;
 				rename = ev->arg4;
 				err = gv_attach_plex(p, v, rename);
 				if (err)
 					G_VINUM_DEBUG(0, "error attaching %s to"
 					    " %s: error code %d", p->name,
 					    v->name, err);
 				break;
 
 			case GV_EVENT_ATTACH_SD:
 				G_VINUM_DEBUG(2, "event 'attach' sd");
 				s = ev->arg1;
 				p = ev->arg2;
 				offset = ev->arg3;
 				rename = ev->arg4;
 				err = gv_attach_sd(s, p, offset, rename);
 				if (err)
 					G_VINUM_DEBUG(0, "error attaching %s to"
 					    " %s: error code %d", s->name,
 					    p->name, err);
 				break;
 
 			case GV_EVENT_DETACH_PLEX:
 				G_VINUM_DEBUG(2, "event 'detach' plex");
 				p = ev->arg1;
 				flags = ev->arg3;
 				err = gv_detach_plex(p, flags);
 				if (err)
 					G_VINUM_DEBUG(0, "error detaching %s: "
 					    "error code %d", p->name, err);
 				break;
 
 			case GV_EVENT_DETACH_SD:
 				G_VINUM_DEBUG(2, "event 'detach' sd");
 				s = ev->arg1;
 				flags = ev->arg3;
 				err = gv_detach_sd(s, flags);
 				if (err)
 					G_VINUM_DEBUG(0, "error detaching %s: "
 					    "error code %d", s->name, err);
 				break;
 
 			case GV_EVENT_RENAME_VOL:
 				G_VINUM_DEBUG(2, "event 'rename' volume");
 				v = ev->arg1;
 				newname = ev->arg2;
 				flags = ev->arg3;
 				err = gv_rename_vol(sc, v, newname, flags);
 				if (err)
 					G_VINUM_DEBUG(0, "error renaming %s to "
 					    "%s: error code %d", v->name,
 					    newname, err);
 				g_free(newname);
 				/* Destroy and recreate the provider if we can. */
 				if (gv_provider_is_open(v->provider)) {
 					G_VINUM_DEBUG(0, "unable to rename "
 					    "provider to %s: provider in use",
 					    v->name);
 					break;
 				}
 				g_topology_lock();
 				g_wither_provider(v->provider, ENOENT);
 				g_topology_unlock();
 				v->provider = NULL;
 				gv_post_event(sc, GV_EVENT_SETUP_OBJECTS, sc,
 				    NULL, 0, 0);
 				break;
 
 			case GV_EVENT_RENAME_PLEX:
 				G_VINUM_DEBUG(2, "event 'rename' plex");
 				p = ev->arg1;
 				newname = ev->arg2;
 				flags = ev->arg3;
 				err = gv_rename_plex(sc, p, newname, flags);
 				if (err)
 					G_VINUM_DEBUG(0, "error renaming %s to "
 					    "%s: error code %d", p->name,
 					    newname, err);
 				g_free(newname);
 				break;
 
 			case GV_EVENT_RENAME_SD:
 				G_VINUM_DEBUG(2, "event 'rename' sd");
 				s = ev->arg1;
 				newname = ev->arg2;
 				flags = ev->arg3;
 				err = gv_rename_sd(sc, s, newname, flags);
 				if (err)
 					G_VINUM_DEBUG(0, "error renaming %s to "
 					    "%s: error code %d", s->name,
 					    newname, err);
 				g_free(newname);
 				break;
 
 			case GV_EVENT_RENAME_DRIVE:
 				G_VINUM_DEBUG(2, "event 'rename' drive");
 				d = ev->arg1;
 				newname = ev->arg2;
 				flags = ev->arg3;
 				err = gv_rename_drive(sc, d, newname, flags);
 				if (err)
 					G_VINUM_DEBUG(0, "error renaming %s to "
 					    "%s: error code %d", d->name,
 					    newname, err);
 				g_free(newname);
 				break;
 
 			case GV_EVENT_MOVE_SD:
 				G_VINUM_DEBUG(2, "event 'move' sd");
 				s = ev->arg1;
 				d = ev->arg2;
 				flags = ev->arg3;
 				err = gv_move_sd(sc, s, d, flags);
 				if (err)
 					G_VINUM_DEBUG(0, "error moving %s to "
 					    "%s: error code %d", s->name,
 					    d->name, err);
 				break;
 
 			case GV_EVENT_THREAD_EXIT:
 				G_VINUM_DEBUG(2, "event 'thread exit'");
 				g_free(ev);
 				mtx_lock(&sc->equeue_mtx);
 				mtx_lock(&sc->bqueue_mtx);
 				gv_cleanup(sc);
 				mtx_destroy(&sc->bqueue_mtx);
 				mtx_destroy(&sc->equeue_mtx);
 				g_free(sc->bqueue_down);
 				g_free(sc->bqueue_up);
 				g_free(sc);
 				kproc_exit(0);
 				/* NOTREACHED */
 
 			default:
 				G_VINUM_DEBUG(1, "unknown event %d", ev->type);
 			}
 
 			g_free(ev);
 			continue;
 		}
 
 		/* ... then do I/O processing. */
 		mtx_lock(&sc->bqueue_mtx);
 		/* First do new requests. */
 		bp = bioq_takefirst(sc->bqueue_down);
 		if (bp != NULL) {
 			mtx_unlock(&sc->bqueue_mtx);
 			/* A bio that interfered with another bio. */
 			if (bp->bio_pflags & GV_BIO_ONHOLD) {
 				s = bp->bio_caller1;
 				p = s->plex_sc;
 				/* Is it still locked out? */
 				if (gv_stripe_active(p, bp)) {
 					/* Park the bio on the waiting queue. */
 					bioq_disksort(p->wqueue, bp);
 				} else {
 					bp->bio_pflags &= ~GV_BIO_ONHOLD;
 					g_io_request(bp, s->drive_sc->consumer);
 				}
 			/* A special request requireing special handling. */
 			} else if (bp->bio_pflags & GV_BIO_INTERNAL) {
 				p = bp->bio_caller1;
 				gv_plex_start(p, bp);
 			} else {
 				gv_volume_start(sc, bp);
 			}
 			mtx_lock(&sc->bqueue_mtx);
 		}
 		/* Then do completed requests. */
 		bp = bioq_takefirst(sc->bqueue_up);
 		if (bp == NULL) {
 			msleep(sc, &sc->bqueue_mtx, PRIBIO, "-", hz/10);
 			mtx_unlock(&sc->bqueue_mtx);
 			continue;
 		}
 		mtx_unlock(&sc->bqueue_mtx);
 		gv_bio_done(sc, bp);
 	}
 }
 
 #define	VINUM_CLASS_NAME "VINUM"
 
 static struct g_class g_vinum_class	= {
 	.name = VINUM_CLASS_NAME,
 	.version = G_VERSION,
 	.init = gv_init,
 	.taste = gv_taste,
 	.ctlreq = gv_config,
 	.destroy_geom = gv_unload,
 };
 
 DECLARE_GEOM_CLASS(g_vinum_class, g_vinum);
 MODULE_VERSION(geom_vinum, 0);
Index: head/sys/geom/vinum/geom_vinum.h
===================================================================
--- head/sys/geom/vinum/geom_vinum.h	(revision 350693)
+++ head/sys/geom/vinum/geom_vinum.h	(revision 350694)
@@ -1,184 +1,165 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2004, 2007 Lukas Ertl
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef	_GEOM_VINUM_H_
 #define	_GEOM_VINUM_H_
 
 /* geom_vinum_create.c */
 void	gv_concat(struct g_geom *gp, struct gctl_req *);
 void	gv_mirror(struct g_geom *gp, struct gctl_req *);
 void	gv_stripe(struct g_geom *gp, struct gctl_req *);
 void	gv_raid5(struct g_geom *gp, struct gctl_req *);
 int	gv_create_drive(struct gv_softc *, struct gv_drive *);
 int	gv_create_volume(struct gv_softc *, struct gv_volume *);
 int	gv_create_plex(struct gv_softc *, struct gv_plex *);
 int	gv_create_sd(struct gv_softc *, struct gv_sd *);
 
 /* geom_vinum_drive.c */
 void	gv_save_config(struct gv_softc *);
 int	gv_read_header(struct g_consumer *, struct gv_hdr *);
 int	gv_write_header(struct g_consumer *, struct gv_hdr *);
 
 /* geom_vinum_init.c */
 void	gv_start_obj(struct g_geom *, struct gctl_req *);
 int	gv_start_plex(struct gv_plex *);
 int	gv_start_vol(struct gv_volume *);
 
 /* geom_vinum_list.c */
 void	gv_ld(struct g_geom *, struct gctl_req *, struct sbuf *);
 void	gv_lp(struct g_geom *, struct gctl_req *, struct sbuf *);
 void	gv_ls(struct g_geom *, struct gctl_req *, struct sbuf *);
 void	gv_lv(struct g_geom *, struct gctl_req *, struct sbuf *);
 void	gv_list(struct g_geom *, struct gctl_req *);
 
 /* geom_vinum_move.c */
 void	gv_move(struct g_geom *, struct gctl_req *);
 int	gv_move_sd(struct gv_softc *, struct gv_sd *, struct gv_drive *, int);
 
 /* geom_vinum_rename.c */
 void	gv_rename(struct g_geom *, struct gctl_req *);
 int	gv_rename_drive(struct gv_softc *, struct gv_drive *, char *, int);
 int	gv_rename_plex(struct gv_softc *, struct gv_plex *, char *, int);
 int	gv_rename_sd(struct gv_softc *, struct gv_sd *, char *, int);
 int	gv_rename_vol(struct gv_softc *, struct gv_volume *, char *, int);
 
 /* geom_vinum_rm.c */
 void	gv_remove(struct g_geom *, struct gctl_req *);
 int	gv_resetconfig(struct gv_softc *);
 void	gv_rm_sd(struct gv_softc *sc, struct gv_sd *s);
 void	gv_rm_drive(struct gv_softc *, struct gv_drive *, int);
 void	gv_rm_plex(struct gv_softc *, struct gv_plex *);
 void	gv_rm_vol(struct gv_softc *, struct gv_volume *);
 
 
 /* geom_vinum_state.c */
 int	gv_sdstatemap(struct gv_plex *);
 void	gv_setstate(struct g_geom *, struct gctl_req *);
 int	gv_set_drive_state(struct gv_drive *, int, int);
 int	gv_set_sd_state(struct gv_sd *, int, int);
 int	gv_set_vol_state(struct gv_volume *, int, int);
 int	gv_set_plex_state(struct gv_plex *, int, int);
 void	gv_update_sd_state(struct gv_sd *);
 void	gv_update_plex_state(struct gv_plex *);
 void	gv_update_vol_state(struct gv_volume *);
 
 /* geom_vinum_subr.c */
 void		 	 gv_adjust_freespace(struct gv_sd *, off_t);
 void		 	 gv_free_sd(struct gv_sd *);
 struct gv_drive		*gv_find_drive(struct gv_softc *, char *);
 struct gv_drive		*gv_find_drive_device(struct gv_softc *, char *);
 struct gv_plex		*gv_find_plex(struct gv_softc *, char *);
 struct gv_sd		*gv_find_sd(struct gv_softc *, char *);
 struct gv_volume	*gv_find_vol(struct gv_softc *, char *);
 void			 gv_format_config(struct gv_softc *, struct sbuf *, int,
 			     char *);
 int			 gv_is_striped(struct gv_plex *);
 int			 gv_consumer_is_open(struct g_consumer *);
 int			 gv_provider_is_open(struct g_provider *);
 int			 gv_object_type(struct gv_softc *, char *);
 void			 gv_parse_config(struct gv_softc *, char *,
 			     struct gv_drive *);
 int			 gv_sd_to_drive(struct gv_sd *, struct gv_drive *);
 int			 gv_sd_to_plex(struct gv_sd *, struct gv_plex *);
 int			 gv_sdcount(struct gv_plex *, int);
 void			 gv_update_plex_config(struct gv_plex *);
 void			 gv_update_vol_size(struct gv_volume *, off_t);
 off_t			 gv_vol_size(struct gv_volume *);
 off_t			 gv_plex_size(struct gv_plex *);
 int			 gv_plexdown(struct gv_volume *);
 int			 gv_attach_plex(struct gv_plex *, struct gv_volume *,
 			     int);
 int			 gv_attach_sd(struct gv_sd *, struct gv_plex *, off_t,
 			     int);
 int			 gv_detach_plex(struct gv_plex *, int);
 int			 gv_detach_sd(struct gv_sd *, int);
 
 /* geom_vinum.c */
 void	gv_worker(void *);
 void	gv_post_event(struct gv_softc *, int, void *, void *, intmax_t,
 	    intmax_t);
 void	gv_worker_exit(struct gv_softc *);
 struct gv_event *gv_get_event(struct gv_softc *);
 void	gv_remove_event(struct gv_softc *, struct gv_event *);
 void	gv_drive_tasted(struct gv_softc *, struct g_provider *);
 void	gv_drive_lost(struct gv_softc *, struct gv_drive *);
 void	gv_setup_objects(struct gv_softc *);
 void	gv_start(struct bio *);
 int	gv_access(struct g_provider *, int, int, int);
 void	gv_cleanup(struct gv_softc *);
 
 /* geom_vinum_volume.c */
 void	gv_done(struct bio *);
 void	gv_volume_start(struct gv_softc *, struct bio *);
 void	gv_volume_flush(struct gv_volume *);
 void	gv_bio_done(struct gv_softc *, struct bio *);
 
 /* geom_vinum_plex.c */
 void	gv_plex_start(struct gv_plex *, struct bio *);
 void	gv_plex_raid5_done(struct gv_plex *, struct bio *);
 void	gv_plex_normal_done(struct gv_plex *, struct bio *);
 int	gv_grow_request(struct gv_plex *, off_t, off_t, int, caddr_t);
 void	gv_grow_complete(struct gv_plex *, struct bio *);
 void	gv_init_request(struct gv_sd *, off_t, caddr_t, off_t);
 void	gv_init_complete(struct gv_plex *, struct bio *);
 void	gv_parity_request(struct gv_plex *, int, off_t);
 void	gv_parity_complete(struct gv_plex *, struct bio *);
 void	gv_rebuild_complete(struct gv_plex *, struct bio *);
 int	gv_sync_request(struct gv_plex *, struct gv_plex *, off_t, off_t, int,
 	    caddr_t);
 int	gv_sync_complete(struct gv_plex *, struct bio *);
 
 extern	u_int	g_vinum_debug;
 
-#define	G_VINUM_DEBUG(lvl, ...)	do {					\
-	if (g_vinum_debug >= (lvl)) {					\
-		printf("GEOM_VINUM");					\
-		if (g_vinum_debug > 0)					\
-			printf("[%u]", lvl);				\
-		printf(": ");						\
-		printf(__VA_ARGS__);					\
-		printf("\n");						\
-	}								\
-} while (0)
-
-#define	G_VINUM_LOGREQ(lvl, bp, ...)	do {				\
-	if (g_vinum_debug >= (lvl)) {					\
-		printf("GEOM_VINUM");					\
-		if (g_vinum_debug > 0)					\
-			printf("[%u]", lvl);				\
-		printf(": ");						\
-		printf(__VA_ARGS__);					\
-		printf(" ");						\
-		g_print_bio(bp);					\
-		printf("\n");						\
-	}								\
-} while (0)
+#define	G_VINUM_DEBUG(lvl, ...) \
+    _GEOM_DEBUG("GEOM_VINUM", g_vinum_debug, (lvl), NULL, __VA_ARGS__)
+#define	G_VINUM_LOGREQ(lvl, bp, ...) \
+    _GEOM_DEBUG("GEOM_VINUM", g_vinum_debug, (lvl), (bp), __VA_ARGS__)
 
 #endif /* !_GEOM_VINUM_H_ */
Index: head/sys/geom/vinum/geom_vinum_create.c
===================================================================
--- head/sys/geom/vinum/geom_vinum_create.c	(revision 350693)
+++ head/sys/geom/vinum/geom_vinum_create.c	(revision 350694)
@@ -1,612 +1,613 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2007 Lukas Ertl
  * Copyright (c) 2007, 2009 Ulf Lilleengen
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/bio.h>
 #include <sys/conf.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/systm.h>
 
 #include <geom/geom.h>
+#include <geom/geom_dbg.h>
 #include <geom/vinum/geom_vinum_var.h>
 #include <geom/vinum/geom_vinum.h>
 
 #define DEFAULT_STRIPESIZE	262144
 
 /*
  * Create a new drive object, either by user request, during taste of the drive
  * itself, or because it was referenced by a subdisk during taste.
  */
 int
 gv_create_drive(struct gv_softc *sc, struct gv_drive *d)
 {
 	struct g_geom *gp;
 	struct g_provider *pp;
 	struct g_consumer *cp, *cp2;
 	struct gv_drive *d2;
 	struct gv_hdr *hdr;
 	struct gv_freelist *fl;
 
 	KASSERT(d != NULL, ("gv_create_drive: NULL d"));
 
 	gp = sc->geom;
 
 	pp = NULL;
 	cp = cp2 = NULL;
 
 	/* The drive already has a consumer if it was tasted before. */
 	if (d->consumer != NULL) {
 		cp = d->consumer;
 		cp->private = d;
 		pp = cp->provider;
 	} else if (!(d->flags & GV_DRIVE_REFERENCED)) {
 		if (gv_find_drive(sc, d->name) != NULL) {
 			G_VINUM_DEBUG(0, "drive '%s' already exists", d->name);
 			g_free(d);
 			return (GV_ERR_CREATE);
 		}
 
 		if (gv_find_drive_device(sc, d->device) != NULL) {
 			G_VINUM_DEBUG(0, "provider '%s' already in use by "
 			    "gvinum", d->device);
 			return (GV_ERR_CREATE);
 		}
 
 		pp = g_provider_by_name(d->device);
 		if (pp == NULL) {
 			G_VINUM_DEBUG(0, "create '%s': device '%s' disappeared",
 			    d->name, d->device);
 			g_free(d);
 			return (GV_ERR_CREATE);
 		}
 
 		g_topology_lock();
 		cp = g_new_consumer(gp);
 		if (g_attach(cp, pp) != 0) {
 			g_destroy_consumer(cp);
 			g_topology_unlock();
 			G_VINUM_DEBUG(0, "create drive '%s': unable to attach",
 			    d->name);
 			g_free(d);
 			return (GV_ERR_CREATE);
 		}
 		g_topology_unlock();
 
 		d->consumer = cp;
 		cp->private = d;
 	}
 
 	/*
 	 * If this was just a "referenced" drive, we're almost finished, but
 	 * insert this drive not on the head of the drives list, as
 	 * gv_drive_is_newer() expects a "real" drive from LIST_FIRST().
 	 */
 	if (d->flags & GV_DRIVE_REFERENCED) {
 		snprintf(d->device, sizeof(d->device), "???");
 		d2 = LIST_FIRST(&sc->drives);
 		if (d2 == NULL)
 			LIST_INSERT_HEAD(&sc->drives, d, drive);
 		else
 			LIST_INSERT_AFTER(d2, d, drive);
 		return (0);
 	}
 
 	/*
 	 * Update access counts of the new drive to those of an already
 	 * existing drive.
 	 */
 	LIST_FOREACH(d2, &sc->drives, drive) {
 		if ((d == d2) || (d2->consumer == NULL))
 			continue;
 
 		cp2 = d2->consumer;
 		g_topology_lock();
 		if ((cp2->acr || cp2->acw || cp2->ace) &&
 		    (g_access(cp, cp2->acr, cp2->acw, cp2->ace) != 0)) {
 			g_detach(cp);
 			g_destroy_consumer(cp);
 			g_topology_unlock();
 			G_VINUM_DEBUG(0, "create drive '%s': unable to update "
 			    "access counts", d->name);
 			if (d->hdr != NULL)
 				g_free(d->hdr);
 			g_free(d);
 			return (GV_ERR_CREATE);
 		}
 		g_topology_unlock();
 		break;
 	}
 
 	d->size = pp->mediasize - GV_DATA_START;
 	d->avail = d->size;
 	d->vinumconf = sc;
 	LIST_INIT(&d->subdisks);
 	LIST_INIT(&d->freelist);
 
 	/* The header might have been set during taste. */
 	if (d->hdr == NULL) {
 		hdr = g_malloc(sizeof(*hdr), M_WAITOK | M_ZERO);
 		hdr->magic = GV_MAGIC;
 		hdr->config_length = GV_CFG_LEN;
 		getcredhostname(NULL, hdr->label.sysname, GV_HOSTNAME_LEN);
 		strlcpy(hdr->label.name, d->name, sizeof(hdr->label.name));
 		microtime(&hdr->label.date_of_birth);
 		d->hdr = hdr;
 	}
 
 	/* We also need a freelist entry. */
 	fl = g_malloc(sizeof(struct gv_freelist), M_WAITOK | M_ZERO);
 	fl->offset = GV_DATA_START;
 	fl->size = d->avail;
 	LIST_INSERT_HEAD(&d->freelist, fl, freelist);
 	d->freelist_entries = 1;
 
 	if (gv_find_drive(sc, d->name) == NULL)
 		LIST_INSERT_HEAD(&sc->drives, d, drive);
 
 	gv_set_drive_state(d, GV_DRIVE_UP, 0);
 	return (0);
 }
 
 int
 gv_create_volume(struct gv_softc *sc, struct gv_volume *v)
 {
 	KASSERT(v != NULL, ("gv_create_volume: NULL v"));
 
 	v->vinumconf = sc;
 	v->flags |= GV_VOL_NEWBORN;
 	LIST_INIT(&v->plexes);
 	LIST_INSERT_HEAD(&sc->volumes, v, volume);
 	v->wqueue = g_malloc(sizeof(struct bio_queue_head), M_WAITOK | M_ZERO);
 	bioq_init(v->wqueue);
 	return (0);
 }
 
 int
 gv_create_plex(struct gv_softc *sc, struct gv_plex *p)
 {
 	struct gv_volume *v;
 
 	KASSERT(p != NULL, ("gv_create_plex: NULL p"));
 
 	/* Find the volume this plex should be attached to. */
 	v = gv_find_vol(sc, p->volume);
 	if (v == NULL) {
 		G_VINUM_DEBUG(0, "create plex '%s': volume '%s' not found",
 		    p->name, p->volume);
 		g_free(p);
 		return (GV_ERR_CREATE);
 	}
 	if (!(v->flags & GV_VOL_NEWBORN))
 		p->flags |= GV_PLEX_ADDED;
 	p->vol_sc = v;
 	v->plexcount++;
 	p->vinumconf = sc;
 	p->synced = 0;
 	p->flags |= GV_PLEX_NEWBORN;
 	LIST_INSERT_HEAD(&v->plexes, p, in_volume);
 	LIST_INIT(&p->subdisks);
 	TAILQ_INIT(&p->packets);
 	LIST_INSERT_HEAD(&sc->plexes, p, plex);
 	p->bqueue = g_malloc(sizeof(struct bio_queue_head), M_WAITOK | M_ZERO);
 	bioq_init(p->bqueue);
 	p->wqueue = g_malloc(sizeof(struct bio_queue_head), M_WAITOK | M_ZERO);
 	bioq_init(p->wqueue);
 	p->rqueue = g_malloc(sizeof(struct bio_queue_head), M_WAITOK | M_ZERO);
 	bioq_init(p->rqueue);
 	return (0);
 }
 
 int
 gv_create_sd(struct gv_softc *sc, struct gv_sd *s)
 {
 	struct gv_plex *p;
 	struct gv_drive *d;
 
 	KASSERT(s != NULL, ("gv_create_sd: NULL s"));
 
 	/* Find the drive where this subdisk should be put on. */
 	d = gv_find_drive(sc, s->drive);
 	if (d == NULL) {
 		/*
 		 * It's possible that the subdisk references a drive that
 		 * doesn't exist yet (during the taste process), so create a
 		 * practically empty "referenced" drive.
 		 */
 		if (s->flags & GV_SD_TASTED) {
 			d = g_malloc(sizeof(struct gv_drive),
 			    M_WAITOK | M_ZERO);
 			d->flags |= GV_DRIVE_REFERENCED;
 			strlcpy(d->name, s->drive, sizeof(d->name));
 			gv_create_drive(sc, d);
 		} else {
 			G_VINUM_DEBUG(0, "create sd '%s': drive '%s' not found",
 			    s->name, s->drive);
 			g_free(s);
 			return (GV_ERR_CREATE);
 		}
 	}
 
 	/* Find the plex where this subdisk belongs to. */
 	p = gv_find_plex(sc, s->plex);
 	if (p == NULL) {
 		G_VINUM_DEBUG(0, "create sd '%s': plex '%s' not found",
 		    s->name, s->plex);
 		g_free(s);
 		return (GV_ERR_CREATE);
 	}
 
 	/*
 	 * First we give the subdisk to the drive, to handle autosized
 	 * values ...
 	 */
 	if (gv_sd_to_drive(s, d) != 0) {
 		g_free(s);
 		return (GV_ERR_CREATE);
 	}
 
 	/*
 	 * Then, we give the subdisk to the plex; we check if the
 	 * given values are correct and maybe adjust them.
 	 */
 	if (gv_sd_to_plex(s, p) != 0) {
 		G_VINUM_DEBUG(0, "unable to give sd '%s' to plex '%s'",
 		    s->name, p->name);
 		if (s->drive_sc && !(s->drive_sc->flags & GV_DRIVE_REFERENCED))
 			LIST_REMOVE(s, from_drive);
 		gv_free_sd(s);
 		g_free(s);
 		/*
 		 * If this subdisk can't be created, we won't create
 		 * the attached plex either, if it is also a new one.
 		 */
 		if (!(p->flags & GV_PLEX_NEWBORN))
 			return (GV_ERR_CREATE);
 		gv_rm_plex(sc, p);
 		return (GV_ERR_CREATE);
 	}
 	s->flags |= GV_SD_NEWBORN;
 
 	s->vinumconf = sc;
 	LIST_INSERT_HEAD(&sc->subdisks, s, sd);
 
 	return (0);
 }
 
 /*
  * Create a concatenated volume from specified drives or drivegroups.
  */
 void
 gv_concat(struct g_geom *gp, struct gctl_req *req)
 {
 	struct gv_drive *d;
 	struct gv_sd *s;
 	struct gv_volume *v;
 	struct gv_plex *p;
 	struct gv_softc *sc;
 	char *drive, buf[30], *vol;
 	int *drives, dcount;
 
 	sc = gp->softc;
 	dcount = 0;
 	vol = gctl_get_param(req, "name", NULL);
 	if (vol == NULL) {
 		gctl_error(req, "volume name not given");	
 		return;
 	}
 
 	drives = gctl_get_paraml(req, "drives", sizeof(*drives));
 
 	if (drives == NULL) { 
 		gctl_error(req, "drive names not given");
 		return;
 	}
 
 	/* First we create the volume. */
 	v = g_malloc(sizeof(*v), M_WAITOK | M_ZERO);
 	strlcpy(v->name, vol, sizeof(v->name));
 	v->state = GV_VOL_UP;
 	gv_post_event(sc, GV_EVENT_CREATE_VOLUME, v, NULL, 0, 0);
 
 	/* Then we create the plex. */
 	p = g_malloc(sizeof(*p), M_WAITOK | M_ZERO);
 	snprintf(p->name, sizeof(p->name), "%s.p%d", v->name, v->plexcount);
 	strlcpy(p->volume, v->name, sizeof(p->volume));
 	p->org = GV_PLEX_CONCAT;
 	p->stripesize = 0;
 	gv_post_event(sc, GV_EVENT_CREATE_PLEX, p, NULL, 0, 0);
 
 	/* Drives are first (right now) priority */
 	for (dcount = 0; dcount < *drives; dcount++) {
 		snprintf(buf, sizeof(buf), "drive%d", dcount);
 		drive = gctl_get_param(req, buf, NULL);
 		d = gv_find_drive(sc, drive);
 		if (d == NULL) {
 			gctl_error(req, "No such drive '%s'", drive);
 			continue;
 		}
 		s = g_malloc(sizeof(*s), M_WAITOK | M_ZERO);
 		snprintf(s->name, sizeof(s->name), "%s.s%d", p->name, dcount);
 		strlcpy(s->plex, p->name, sizeof(s->plex));
 		strlcpy(s->drive, drive, sizeof(s->drive));
 		s->plex_offset = -1;
 		s->drive_offset = -1;
 		s->size = -1;
 		gv_post_event(sc, GV_EVENT_CREATE_SD, s, NULL, 0, 0);
 	}
 	gv_post_event(sc, GV_EVENT_SETUP_OBJECTS, sc, NULL, 0, 0);
 	gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0);
 }
 
 /*
  * Create a mirrored volume from specified drives or drivegroups.
  */
 void
 gv_mirror(struct g_geom *gp, struct gctl_req *req)
 {
 	struct gv_drive *d;
 	struct gv_sd *s;
 	struct gv_volume *v;
 	struct gv_plex *p;
 	struct gv_softc *sc;
 	char *drive, buf[30], *vol;
 	int *drives, *flags, dcount, pcount, scount;
 
 	sc = gp->softc;
 	dcount = 0;
 	scount = 0;
 	pcount = 0;
 	vol = gctl_get_param(req, "name", NULL);
 	if (vol == NULL) {
 		gctl_error(req, "volume name not given");	
 		return;
 	}
 
 	flags = gctl_get_paraml(req, "flags", sizeof(*flags));
 	drives = gctl_get_paraml(req, "drives", sizeof(*drives));
 
 	if (drives == NULL) { 
 		gctl_error(req, "drive names not given");
 		return;
 	}
 
 	/* We must have an even number of drives. */
 	if (*drives % 2 != 0) {
 		gctl_error(req, "mirror organization must have an even number "
 		    "of drives");
 		return;
 	}
 	if (*flags & GV_FLAG_S && *drives < 4) {
 		gctl_error(req, "must have at least 4 drives for striped plex");
 		return;
 	}
 
 	/* First we create the volume. */
 	v = g_malloc(sizeof(*v), M_WAITOK | M_ZERO);
 	strlcpy(v->name, vol, sizeof(v->name));
 	v->state = GV_VOL_UP;
 	gv_post_event(sc, GV_EVENT_CREATE_VOLUME, v, NULL, 0, 0);
 
 	/* Then we create the plexes. */
 	for (pcount = 0; pcount < 2; pcount++) {
 		p = g_malloc(sizeof(*p), M_WAITOK | M_ZERO);
 		snprintf(p->name, sizeof(p->name), "%s.p%d", v->name,
 		    pcount);
 		strlcpy(p->volume, v->name, sizeof(p->volume));
 		if (*flags & GV_FLAG_S) {
 			p->org = GV_PLEX_STRIPED;
 			p->stripesize = DEFAULT_STRIPESIZE;
 		} else {
 			p->org = GV_PLEX_CONCAT;
 			p->stripesize = -1;
 		}
 		gv_post_event(sc, GV_EVENT_CREATE_PLEX, p, NULL, 0, 0);
 
 		/*
 		 * We just gives each even drive to plex one, and each odd to
 		 * plex two.
 		 */
 		scount = 0;
 		for (dcount = pcount; dcount < *drives; dcount += 2) {
 			snprintf(buf, sizeof(buf), "drive%d", dcount);
 			drive = gctl_get_param(req, buf, NULL);
 			d = gv_find_drive(sc, drive);
 			if (d == NULL) {
 				gctl_error(req, "No such drive '%s', aborting",
 				    drive);
 				scount++;
 				break;
 			}
 			s = g_malloc(sizeof(*s), M_WAITOK | M_ZERO);
 			snprintf(s->name, sizeof(s->name), "%s.s%d", p->name,
 			    scount);
 			strlcpy(s->plex, p->name, sizeof(s->plex));
 			strlcpy(s->drive, drive, sizeof(s->drive));
 			s->plex_offset = -1;
 			s->drive_offset = -1;
 			s->size = -1;
 			gv_post_event(sc, GV_EVENT_CREATE_SD, s, NULL, 0, 0);
 			scount++;
 		}
 	}
 	gv_post_event(sc, GV_EVENT_SETUP_OBJECTS, sc, NULL, 0, 0);
 	gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0);
 }
 
 void
 gv_raid5(struct g_geom *gp, struct gctl_req *req)
 {
 	struct gv_softc *sc;
 	struct gv_drive *d;
 	struct gv_volume *v;
 	struct gv_plex *p;
 	struct gv_sd *s;
 	int *drives, *flags, dcount;
 	char *vol, *drive, buf[30];
 	off_t *stripesize;
 
 	sc = gp->softc;
 
 	vol = gctl_get_param(req, "name", NULL);
 	if (vol == NULL) {
 		gctl_error(req, "volume name not given");	
 		return;
 	}
 	flags = gctl_get_paraml(req, "flags", sizeof(*flags));
 	drives = gctl_get_paraml(req, "drives", sizeof(*drives));
 	stripesize = gctl_get_paraml(req, "stripesize", sizeof(*stripesize));
 
 	if (stripesize == NULL) {
 		gctl_error(req, "no stripesize given");
 		return;
 	}
 
 	if (drives == NULL) {
 		gctl_error(req, "drive names not given");
 		return;
 	}
 
 	/* We must have at least three drives. */
 	if (*drives < 3) {
 		gctl_error(req, "must have at least three drives for this "
 		    "plex organisation");
 		return;
 	}
 	/* First we create the volume. */
 	v = g_malloc(sizeof(*v), M_WAITOK | M_ZERO);
 	strlcpy(v->name, vol, sizeof(v->name));
 	v->state = GV_VOL_UP;
 	gv_post_event(sc, GV_EVENT_CREATE_VOLUME, v, NULL, 0, 0);
 
 	/* Then we create the plex. */
 	p = g_malloc(sizeof(*p), M_WAITOK | M_ZERO);
 	snprintf(p->name, sizeof(p->name), "%s.p%d", v->name, v->plexcount);
 	strlcpy(p->volume, v->name, sizeof(p->volume));
 	p->org = GV_PLEX_RAID5;
 	p->stripesize = *stripesize;
 	gv_post_event(sc, GV_EVENT_CREATE_PLEX, p, NULL, 0, 0);
 
 	/* Create subdisks on drives. */
 	for (dcount = 0; dcount < *drives; dcount++) {
 		snprintf(buf, sizeof(buf), "drive%d", dcount);
 		drive = gctl_get_param(req, buf, NULL);
 		d = gv_find_drive(sc, drive);
 		if (d == NULL) {
 			gctl_error(req, "No such drive '%s'", drive);
 			continue;
 		}
 		s = g_malloc(sizeof(*s), M_WAITOK | M_ZERO);
 		snprintf(s->name, sizeof(s->name), "%s.s%d", p->name, dcount);
 		strlcpy(s->plex, p->name, sizeof(s->plex));
 		strlcpy(s->drive, drive, sizeof(s->drive));
 		s->plex_offset = -1;
 		s->drive_offset = -1;
 		s->size = -1;
 		gv_post_event(sc, GV_EVENT_CREATE_SD, s, NULL, 0, 0);
 	}
 	gv_post_event(sc, GV_EVENT_SETUP_OBJECTS, sc, NULL, 0, 0);
 	gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0);
 }
 
 /*
  * Create a striped volume from specified drives or drivegroups.
  */
 void
 gv_stripe(struct g_geom *gp, struct gctl_req *req)
 {
 	struct gv_drive *d;
 	struct gv_sd *s;
 	struct gv_volume *v;
 	struct gv_plex *p;
 	struct gv_softc *sc;
 	char *drive, buf[30], *vol;
 	int *drives, *flags, dcount, pcount;
 
 	sc = gp->softc;
 	dcount = 0;
 	pcount = 0;
 	vol = gctl_get_param(req, "name", NULL);
 	if (vol == NULL) {
 		gctl_error(req, "volume name not given");	
 		return;
 	}
 	flags = gctl_get_paraml(req, "flags", sizeof(*flags));
 	drives = gctl_get_paraml(req, "drives", sizeof(*drives));
 
 	if (drives == NULL) { 
 		gctl_error(req, "drive names not given");
 		return;
 	}
 
 	/* We must have at least two drives. */
 	if (*drives < 2) {
 		gctl_error(req, "must have at least 2 drives");
 		return;
 	}
 
 	/* First we create the volume. */
 	v = g_malloc(sizeof(*v), M_WAITOK | M_ZERO);
 	strlcpy(v->name, vol, sizeof(v->name));
 	v->state = GV_VOL_UP;
 	gv_post_event(sc, GV_EVENT_CREATE_VOLUME, v, NULL, 0, 0);
 
 	/* Then we create the plex. */
 	p = g_malloc(sizeof(*p), M_WAITOK | M_ZERO);
 	snprintf(p->name, sizeof(p->name), "%s.p%d", v->name, v->plexcount);
 	strlcpy(p->volume, v->name, sizeof(p->volume));
 	p->org = GV_PLEX_STRIPED;
 	p->stripesize = 262144;
 	gv_post_event(sc, GV_EVENT_CREATE_PLEX, p, NULL, 0, 0);
 
 	/* Create subdisks on drives. */
 	for (dcount = 0; dcount < *drives; dcount++) {
 		snprintf(buf, sizeof(buf), "drive%d", dcount);
 		drive = gctl_get_param(req, buf, NULL);
 		d = gv_find_drive(sc, drive);
 		if (d == NULL) {
 			gctl_error(req, "No such drive '%s'", drive);
 			continue;
 		}
 		s = g_malloc(sizeof(*s), M_WAITOK | M_ZERO);
 		snprintf(s->name, sizeof(s->name), "%s.s%d", p->name, dcount);
 		strlcpy(s->plex, p->name, sizeof(s->plex));
 		strlcpy(s->drive, drive, sizeof(s->drive));
 		s->plex_offset = -1;
 		s->drive_offset = -1;
 		s->size = -1;
 		gv_post_event(sc, GV_EVENT_CREATE_SD, s, NULL, 0, 0);
 	}
 	gv_post_event(sc, GV_EVENT_SETUP_OBJECTS, sc, NULL, 0, 0);
 	gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0);
 }
Index: head/sys/geom/vinum/geom_vinum_drive.c
===================================================================
--- head/sys/geom/vinum/geom_vinum_drive.c	(revision 350693)
+++ head/sys/geom/vinum/geom_vinum_drive.c	(revision 350694)
@@ -1,354 +1,355 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2004, 2005, 2007 Lukas Ertl
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/types.h>
 #include <sys/endian.h>
 #include <sys/malloc.h>
 #include <sys/sbuf.h>
 #include <sys/systm.h>
 
 #include <geom/geom.h>
+#include <geom/geom_dbg.h>
 #include <geom/vinum/geom_vinum_var.h>
 #include <geom/vinum/geom_vinum.h>
 
 #define GV_LEGACY_I386	0
 #define GV_LEGACY_AMD64 1
 #define GV_LEGACY_SPARC64 2
 #define GV_LEGACY_POWERPC 3
 
 static int	gv_legacy_header_type(uint8_t *, int);
 
 /*
  * Here are the "offset (size)" for the various struct gv_hdr fields,
  * for the legacy i386 (or 32-bit powerpc), legacy amd64 (or sparc64), and
  * current (cpu & endian agnostic) versions of the on-disk format of the vinum
  * header structure:
  *
  *       i386    amd64   current   field
  *     -------- -------- --------  -----
  *       0 ( 8)   0 ( 8)   0 ( 8)  magic
  *       8 ( 4)   8 ( 8)   8 ( 8)  config_length
  *      12 (32)  16 (32)  16 (32)  label.sysname
  *      44 (32)  48 (32)  48 (32)  label.name
  *      76 ( 4)  80 ( 8)  80 ( 8)  label.date_of_birth.tv_sec
  *      80 ( 4)  88 ( 8)  88 ( 8)  label.date_of_birth.tv_usec
  *      84 ( 4)  96 ( 8)  96 ( 8)  label.last_update.tv_sec
  *      88 ( 4) 104 ( 8) 104 ( 8)  label.last_update.tv_usec
  *      92 ( 8) 112 ( 8) 112 ( 8)  label.drive_size
  *     ======== ======== ========
  *     100      120      120       total size
  *
  * NOTE: i386 and amd64 formats are stored as little-endian; the current
  * format uses big-endian (network order).
  */
 
 
 /* Checks for legacy format depending on platform. */
 static int
 gv_legacy_header_type(uint8_t *hdr, int bigendian)
 {
 	uint32_t *i32;
 	int arch_32, arch_64, i;
 
 	/* Set arch according to endianness. */
 	if (bigendian) {
 		arch_32 = GV_LEGACY_POWERPC;
 		arch_64 = GV_LEGACY_SPARC64;
 	} else {
 		arch_32 = GV_LEGACY_I386;
 		arch_64 = GV_LEGACY_AMD64;
 	}
 
 	/* if non-empty hostname overlaps 64-bit config_length */
 	i32 = (uint32_t *)(hdr + 12);
 	if (*i32 != 0)
 		return (arch_32);
 	/* check for non-empty hostname */
 	if (hdr[16] != 0)
 		return (arch_64);
 	/* check bytes past 32-bit structure */
 	for (i = 100; i < 120; i++)
 		if (hdr[i] != 0)
 			return (arch_32);
 	/* check for overlapping timestamp */
 	i32 = (uint32_t *)(hdr + 84);
 
 	if (*i32 == 0)
 		return (arch_64);
 	return (arch_32);
 }
 
 /*
  * Read the header while taking magic number into account, and write it to
  * destination pointer.
  */
 int
 gv_read_header(struct g_consumer *cp, struct gv_hdr *m_hdr)
 {
 	struct g_provider *pp;
 	uint64_t magic_machdep;
 	uint8_t *d_hdr;
 	int be, off;
 
 #define GV_GET32(endian)					\
 		endian##32toh(*((uint32_t *)&d_hdr[off]));	\
 		off += 4
 #define GV_GET64(endian)					\
 		endian##64toh(*((uint64_t *)&d_hdr[off]));	\
 		off += 8
 
 	KASSERT(m_hdr != NULL, ("gv_read_header: null m_hdr"));
 	KASSERT(cp != NULL, ("gv_read_header: null cp"));
 	pp = cp->provider;
 	KASSERT(pp != NULL, ("gv_read_header: null pp"));
 
 	if ((GV_HDR_OFFSET % pp->sectorsize) != 0 ||
 	    (GV_HDR_LEN % pp->sectorsize) != 0)
 		return (ENODEV);
 
 	d_hdr = g_read_data(cp, GV_HDR_OFFSET, pp->sectorsize, NULL);
 	if (d_hdr == NULL)
 		return (-1);
 	off = 0;
 	m_hdr->magic = GV_GET64(be);
 	magic_machdep = *((uint64_t *)&d_hdr[0]);
 	/*
 	 * The big endian machines will have a reverse of GV_OLD_MAGIC, so we
 	 * need to decide if we are running on a big endian machine as well as
 	 * checking the magic against the reverse of GV_OLD_MAGIC.
 	 */
 	be = (m_hdr->magic == magic_machdep);
 	if (m_hdr->magic == GV_MAGIC) {
 		m_hdr->config_length = GV_GET64(be);
 		off = 16;
 		bcopy(d_hdr + off, m_hdr->label.sysname, GV_HOSTNAME_LEN);
 		off += GV_HOSTNAME_LEN;
 		bcopy(d_hdr + off, m_hdr->label.name, GV_MAXDRIVENAME);
 		off += GV_MAXDRIVENAME;
 		m_hdr->label.date_of_birth.tv_sec = GV_GET64(be);
 		m_hdr->label.date_of_birth.tv_usec = GV_GET64(be);
 		m_hdr->label.last_update.tv_sec = GV_GET64(be);
 		m_hdr->label.last_update.tv_usec = GV_GET64(be);
 		m_hdr->label.drive_size = GV_GET64(be);
 	} else if (m_hdr->magic != GV_OLD_MAGIC &&
 	    m_hdr->magic != le64toh(GV_OLD_MAGIC)) {
 		/* Not a gvinum drive. */
 		g_free(d_hdr);
 		return (-1);
 	} else if (gv_legacy_header_type(d_hdr, be) == GV_LEGACY_SPARC64) {
 		G_VINUM_DEBUG(1, "detected legacy sparc64 header");
 		m_hdr->magic = GV_MAGIC;
 		/* Legacy sparc64 on-disk header */
 		m_hdr->config_length = GV_GET64(be);
 		bcopy(d_hdr + 16, m_hdr->label.sysname, GV_HOSTNAME_LEN);
 		off += GV_HOSTNAME_LEN;
 		bcopy(d_hdr + 48, m_hdr->label.name, GV_MAXDRIVENAME);
 		off += GV_MAXDRIVENAME;
 		m_hdr->label.date_of_birth.tv_sec = GV_GET64(be);
 		m_hdr->label.date_of_birth.tv_usec = GV_GET64(be);
 		m_hdr->label.last_update.tv_sec = GV_GET64(be);
 		m_hdr->label.last_update.tv_usec = GV_GET64(be);
 		m_hdr->label.drive_size = GV_GET64(be);
 	} else if (gv_legacy_header_type(d_hdr, be) == GV_LEGACY_POWERPC) {
 		G_VINUM_DEBUG(1, "detected legacy PowerPC header");
 		m_hdr->magic = GV_MAGIC;
 		/* legacy 32-bit big endian on-disk header */
 		m_hdr->config_length = GV_GET32(be);
 		bcopy(d_hdr + off, m_hdr->label.sysname, GV_HOSTNAME_LEN);
 		off += GV_HOSTNAME_LEN;
 		bcopy(d_hdr + off, m_hdr->label.name, GV_MAXDRIVENAME);
 		off += GV_MAXDRIVENAME;
 		m_hdr->label.date_of_birth.tv_sec = GV_GET32(be);
 		m_hdr->label.date_of_birth.tv_usec = GV_GET32(be);
 		m_hdr->label.last_update.tv_sec = GV_GET32(be);
 		m_hdr->label.last_update.tv_usec = GV_GET32(be);
 		m_hdr->label.drive_size = GV_GET64(be);
 	} else if (gv_legacy_header_type(d_hdr, be) == GV_LEGACY_I386) {
 		G_VINUM_DEBUG(1, "detected legacy i386 header");
 		m_hdr->magic = GV_MAGIC;
 		/* legacy i386 on-disk header */
 		m_hdr->config_length = GV_GET32(le);
 		bcopy(d_hdr + off, m_hdr->label.sysname, GV_HOSTNAME_LEN);
 		off += GV_HOSTNAME_LEN;
 		bcopy(d_hdr + off, m_hdr->label.name, GV_MAXDRIVENAME);
 		off += GV_MAXDRIVENAME;
 		m_hdr->label.date_of_birth.tv_sec = GV_GET32(le);
 		m_hdr->label.date_of_birth.tv_usec = GV_GET32(le);
 		m_hdr->label.last_update.tv_sec = GV_GET32(le);
 		m_hdr->label.last_update.tv_usec = GV_GET32(le);
 		m_hdr->label.drive_size = GV_GET64(le);
 	} else {
 		G_VINUM_DEBUG(1, "detected legacy amd64 header");
 		m_hdr->magic = GV_MAGIC;
 		/* legacy amd64 on-disk header */
 		m_hdr->config_length = GV_GET64(le);
 		bcopy(d_hdr + 16, m_hdr->label.sysname, GV_HOSTNAME_LEN);
 		off += GV_HOSTNAME_LEN;
 		bcopy(d_hdr + 48, m_hdr->label.name, GV_MAXDRIVENAME);
 		off += GV_MAXDRIVENAME;
 		m_hdr->label.date_of_birth.tv_sec = GV_GET64(le);
 		m_hdr->label.date_of_birth.tv_usec = GV_GET64(le);
 		m_hdr->label.last_update.tv_sec = GV_GET64(le);
 		m_hdr->label.last_update.tv_usec = GV_GET64(le);
 		m_hdr->label.drive_size = GV_GET64(le);
 	}
 
 	g_free(d_hdr);
 	return (0);
 }
 
 /* Write out the gvinum header. */
 int
 gv_write_header(struct g_consumer *cp, struct gv_hdr *m_hdr)
 {
 	uint8_t d_hdr[GV_HDR_LEN];
 	int off, ret;
 
 #define GV_SET64BE(field)					\
 	do {							\
 		*((uint64_t *)&d_hdr[off]) = htobe64(field);	\
 		off += 8;					\
 	} while (0)
 
 	KASSERT(m_hdr != NULL, ("gv_write_header: null m_hdr"));
 
 	off = 0;
 	memset(d_hdr, 0, GV_HDR_LEN);
 	GV_SET64BE(m_hdr->magic);
 	GV_SET64BE(m_hdr->config_length);
 	off = 16;
 	bcopy(m_hdr->label.sysname, d_hdr + off, GV_HOSTNAME_LEN);
 	off += GV_HOSTNAME_LEN;
 	bcopy(m_hdr->label.name, d_hdr + off, GV_MAXDRIVENAME);
 	off += GV_MAXDRIVENAME;
 	GV_SET64BE(m_hdr->label.date_of_birth.tv_sec);
 	GV_SET64BE(m_hdr->label.date_of_birth.tv_usec);
 	GV_SET64BE(m_hdr->label.last_update.tv_sec);
 	GV_SET64BE(m_hdr->label.last_update.tv_usec);
 	GV_SET64BE(m_hdr->label.drive_size);
 
 	ret = g_write_data(cp, GV_HDR_OFFSET, d_hdr, GV_HDR_LEN);
 	return (ret);
 }
 
 /* Save the vinum configuration back to each involved disk. */
 void
 gv_save_config(struct gv_softc *sc)
 {
 	struct g_consumer *cp;
 	struct gv_drive *d;
 	struct gv_hdr *vhdr, *hdr;
 	struct sbuf *sb;
 	struct timeval last_update;
 	int error;
 
 	KASSERT(sc != NULL, ("gv_save_config: null sc"));
 
 	vhdr = g_malloc(GV_HDR_LEN, M_WAITOK | M_ZERO);
 	vhdr->magic = GV_MAGIC;
 	vhdr->config_length = GV_CFG_LEN;
 	microtime(&last_update);
 
 	sb = sbuf_new(NULL, NULL, GV_CFG_LEN, SBUF_FIXEDLEN);
 	gv_format_config(sc, sb, 1, NULL);
 	sbuf_finish(sb);
 
 	LIST_FOREACH(d, &sc->drives, drive) {
 		/*
 		 * We can't save the config on a drive that isn't up, but
 		 * drives that were just created aren't officially up yet, so
 		 * we check a special flag.
 		 */
 		if (d->state != GV_DRIVE_UP)
 			continue;
 
 		cp = d->consumer;
 		if (cp == NULL) {
 			G_VINUM_DEBUG(0, "drive '%s' has no consumer!",
 			    d->name);
 			continue;
 		}
 
 		hdr = d->hdr;
 		if (hdr == NULL) {
 			G_VINUM_DEBUG(0, "drive '%s' has no header",
 			    d->name);
 			g_free(vhdr);
 			continue;
 		}
 		bcopy(&last_update, &hdr->label.last_update,
 		    sizeof(struct timeval));
 		bcopy(&hdr->label, &vhdr->label, sizeof(struct gv_label));
 		g_topology_lock();
 		error = g_access(cp, 0, 1, 0);
 		if (error) {
 			G_VINUM_DEBUG(0, "g_access failed on "
 			    "drive %s, errno %d", d->name, error);
 			g_topology_unlock();
 			continue;
 		}
 		g_topology_unlock();
 
 		error = gv_write_header(cp, vhdr);
 		if (error) {
 			G_VINUM_DEBUG(0, "writing vhdr failed on drive %s, "
 			    "errno %d", d->name, error);
 			g_topology_lock();
 			g_access(cp, 0, -1, 0);
 			g_topology_unlock();
 			continue;
 		}
 		/* First config copy. */
 		error = g_write_data(cp, GV_CFG_OFFSET, sbuf_data(sb),
 		    GV_CFG_LEN);
 		if (error) {
 			G_VINUM_DEBUG(0, "writing first config copy failed on "
 			    "drive %s, errno %d", d->name, error);
 			g_topology_lock();
 			g_access(cp, 0, -1, 0);
 			g_topology_unlock();
 			continue;
 		}
 		/* Second config copy. */
 		error = g_write_data(cp, GV_CFG_OFFSET + GV_CFG_LEN,
 		    sbuf_data(sb), GV_CFG_LEN);
 		if (error)
 			G_VINUM_DEBUG(0, "writing second config copy failed on "
 			    "drive %s, errno %d", d->name, error);
 
 		g_topology_lock();
 		g_access(cp, 0, -1, 0);
 		g_topology_unlock();
 	}
 
 	sbuf_delete(sb);
 	g_free(vhdr);
 }
Index: head/sys/geom/vinum/geom_vinum_events.c
===================================================================
--- head/sys/geom/vinum/geom_vinum_events.c	(revision 350693)
+++ head/sys/geom/vinum/geom_vinum_events.c	(revision 350694)
@@ -1,262 +1,263 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  *  Copyright (c) 2007 Lukas Ertl
  *  All rights reserved.
  * 
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 
  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/systm.h>
 
 #include <geom/geom.h>
+#include <geom/geom_dbg.h>
 #include <geom/vinum/geom_vinum_var.h>
 #include <geom/vinum/geom_vinum.h>
 
 void
 gv_post_event(struct gv_softc *sc, int event, void *arg1, void *arg2,
     intmax_t arg3, intmax_t arg4)
 {
 	struct gv_event *ev;
 
 	ev = g_malloc(sizeof(*ev), M_WAITOK | M_ZERO);
 	ev->type = event;
 	ev->arg1 = arg1;
 	ev->arg2 = arg2;
 	ev->arg3 = arg3;
 	ev->arg4 = arg4;
 
 	mtx_lock(&sc->equeue_mtx);
 	TAILQ_INSERT_TAIL(&sc->equeue, ev, events);
 	wakeup(sc);
 	mtx_unlock(&sc->equeue_mtx);
 }
 
 void
 gv_worker_exit(struct gv_softc *sc)
 {
 	struct gv_event *ev;
 
 	ev = g_malloc(sizeof(*ev), M_WAITOK | M_ZERO);
 	ev->type = GV_EVENT_THREAD_EXIT;
 
 	mtx_lock(&sc->equeue_mtx);
 	TAILQ_INSERT_TAIL(&sc->equeue, ev, events);
 	wakeup(sc);
 	msleep(sc->worker, &sc->equeue_mtx, PDROP, "gv_wor", 0);
 }
 
 struct gv_event *
 gv_get_event(struct gv_softc *sc)
 {
 	struct gv_event *ev;
 
 	KASSERT(sc != NULL, ("NULL sc"));
 	mtx_lock(&sc->equeue_mtx);
 	ev = TAILQ_FIRST(&sc->equeue);
 	mtx_unlock(&sc->equeue_mtx);
 	return (ev);
 }
 
 void
 gv_remove_event(struct gv_softc *sc, struct gv_event *ev)
 {
 
 	KASSERT(sc != NULL, ("NULL sc"));
 	KASSERT(ev != NULL, ("NULL ev"));
 	mtx_lock(&sc->equeue_mtx);
 	TAILQ_REMOVE(&sc->equeue, ev, events);
 	mtx_unlock(&sc->equeue_mtx);
 }
 
 void
 gv_drive_tasted(struct gv_softc *sc, struct g_provider *pp)
 {
 	struct g_geom *gp;
 	struct g_consumer *cp;
 	struct gv_hdr *hdr;
 	struct gv_drive *d;
 	char *buf;
 	int error;
 
 	hdr = NULL;
 	buf = NULL;
 
 	G_VINUM_DEBUG(2, "tasted drive on '%s'", pp->name);
 	if ((GV_CFG_OFFSET % pp->sectorsize) != 0 ||
 	    (GV_CFG_LEN % pp->sectorsize) != 0) {
 		G_VINUM_DEBUG(0, "provider %s has unsupported sectorsize.",
 		    pp->name);
 		return;
 	}
 
 	gp = sc->geom;
 	g_topology_lock();
 	cp = g_new_consumer(gp);
 	if (g_attach(cp, pp) != 0) {
 		g_destroy_consumer(cp);
 		g_topology_unlock();
 		G_VINUM_DEBUG(0, "failed to attach to provider on taste event");
 		return;
 	}
 	if (g_access(cp, 1, 0, 0) != 0) {
 		g_detach(cp);
 		g_destroy_consumer(cp);
 		g_topology_unlock();
 		G_VINUM_DEBUG(0, "failed to access consumer on taste event");
 		return;
 	}
 	g_topology_unlock();
 
 	hdr = g_malloc(GV_HDR_LEN, M_WAITOK | M_ZERO);
 	/* Read header and on-disk configuration. */
 	error = gv_read_header(cp, hdr);
 	if (error) {
 		G_VINUM_DEBUG(0, "failed to read header during taste");
 		goto failed;
 	}
 
 	/*
 	 * Setup the drive before we parse the on-disk configuration, so that
 	 * we already know about the drive then.
 	 */
 	d = gv_find_drive(sc, hdr->label.name);
 	if (d == NULL) {
 		d = g_malloc(sizeof(*d), M_WAITOK | M_ZERO);
 		strlcpy(d->name, hdr->label.name, sizeof(d->name));
 		strlcpy(d->device, pp->name, sizeof(d->device));
 	} else if (d->flags & GV_DRIVE_REFERENCED) {
 		strlcpy(d->device, pp->name, sizeof(d->device));
 		d->flags &= ~GV_DRIVE_REFERENCED;
 	} else {
 		G_VINUM_DEBUG(2, "drive '%s' is already known", d->name);
 		goto failed;
 	}
 
 	/* Add the consumer and header to the new drive. */
 	d->consumer = cp;
 	d->hdr = hdr;
 	gv_create_drive(sc, d);
 
 	buf = g_read_data(cp, GV_CFG_OFFSET, GV_CFG_LEN, NULL);
 	if (buf == NULL) {
 		G_VINUM_DEBUG(0, "failed to read config during taste");
 		goto failed;
 	}
 	gv_parse_config(sc, buf, d);
 	g_free(buf);
 
 	g_topology_lock();
 	g_access(cp, -1, 0, 0);
 	g_topology_unlock();
 
 	gv_setup_objects(sc);
 	gv_set_drive_state(d, GV_DRIVE_UP, 0);
 
 	return;
 
 failed:
 	if (hdr != NULL)
 		g_free(hdr);
 	g_topology_lock();
 	g_access(cp, -1, 0, 0);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 	g_topology_unlock();
 }
 
 /*
  * When losing a drive (e.g. hardware failure), we cut down the consumer
  * attached to the underlying device and bring the drive itself to a
  * "referenced" state so that normal tasting could bring it up cleanly if it
  * possibly arrives again.
  */
 void
 gv_drive_lost(struct gv_softc *sc, struct gv_drive *d)
 {
 	struct g_consumer *cp;
 	struct gv_drive *d2;
 	struct gv_sd *s, *s2;
 	struct gv_freelist *fl, *fl2;
 
 	gv_set_drive_state(d, GV_DRIVE_DOWN,
 	    GV_SETSTATE_FORCE | GV_SETSTATE_CONFIG);
 
 	cp = d->consumer;
 
 	if (cp != NULL) {
 		if (cp->nstart != cp->nend) {
 			G_VINUM_DEBUG(0, "dead drive '%s' has still active "
 			    "requests, unable to detach consumer", d->name);
 			gv_post_event(sc, GV_EVENT_DRIVE_LOST, d, NULL, 0, 0);
 			return;
 		}
 		g_topology_lock();
 		if (cp->acr != 0 || cp->acw != 0 || cp->ace != 0)
 			g_access(cp, -cp->acr, -cp->acw, -cp->ace);
 		g_detach(cp);
 		g_destroy_consumer(cp);
 		g_topology_unlock();
 	}
 
 	LIST_FOREACH_SAFE(fl, &d->freelist, freelist, fl2) {
 		LIST_REMOVE(fl, freelist);
 		g_free(fl);
 	}
 
 	d->consumer = NULL;
 	g_free(d->hdr);
 	d->hdr = NULL;
 	d->flags |= GV_DRIVE_REFERENCED;
 	snprintf(d->device, sizeof(d->device), "???");
 	d->size = 0;
 	d->avail = 0;
 	d->freelist_entries = 0;
 	d->sdcount = 0;
 
 	/* Put the subdisk in tasted mode, and remove from drive list. */
 	LIST_FOREACH_SAFE(s, &d->subdisks, from_drive, s2) {
 		LIST_REMOVE(s, from_drive);
 		s->flags |= GV_SD_TASTED;
 	}
 
 	/*
 	 * Don't forget that gv_is_newer wants a "real" drive at the beginning
 	 * of the list, so, just to be safe, we shuffle around.
 	 */
 	LIST_REMOVE(d, drive);
 	d2 = LIST_FIRST(&sc->drives);
 	if (d2 == NULL)
 		LIST_INSERT_HEAD(&sc->drives, d, drive);
 	else
 		LIST_INSERT_AFTER(d2, d, drive);
 	gv_save_config(sc);
 }
Index: head/sys/geom/vinum/geom_vinum_init.c
===================================================================
--- head/sys/geom/vinum/geom_vinum_init.c	(revision 350693)
+++ head/sys/geom/vinum/geom_vinum_init.c	(revision 350694)
@@ -1,390 +1,391 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2004, 2007 Lukas Ertl
  * Copyright (c) 2007, 2009 Ulf Lilleengen
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 #include <sys/param.h>
 #include <sys/bio.h>
 #include <sys/libkern.h>
 #include <sys/malloc.h>
 
 #include <geom/geom.h>
+#include <geom/geom_dbg.h>
 #include <geom/vinum/geom_vinum_var.h>
 #include <geom/vinum/geom_vinum.h>
 
 static int		 gv_sync(struct gv_volume *);
 static int		 gv_rebuild_plex(struct gv_plex *);
 static int		 gv_init_plex(struct gv_plex *);
 static int		 gv_grow_plex(struct gv_plex *);
 static int		 gv_sync_plex(struct gv_plex *, struct gv_plex *);
 static struct gv_plex	*gv_find_good_plex(struct gv_volume *);
 
 void
 gv_start_obj(struct g_geom *gp, struct gctl_req *req)
 {
 	struct gv_softc *sc;
 	struct gv_volume *v;
 	struct gv_plex *p;
 	int *argc, *initsize;
 	char *argv, buf[20];
 	int i, type;
 
 	argc = gctl_get_paraml(req, "argc", sizeof(*argc));
 	initsize = gctl_get_paraml(req, "initsize", sizeof(*initsize));
 
 	if (argc == NULL || *argc == 0) {
 		gctl_error(req, "no arguments given");
 		return;
 	}
 
 	sc = gp->softc;
 
 	for (i = 0; i < *argc; i++) {
 		snprintf(buf, sizeof(buf), "argv%d", i);
 		argv = gctl_get_param(req, buf, NULL);
 		if (argv == NULL)
 			continue;
 		type = gv_object_type(sc, argv);
 		switch (type) {
 		case GV_TYPE_VOL:
 			v = gv_find_vol(sc, argv);
 			if (v != NULL)
 				gv_post_event(sc, GV_EVENT_START_VOLUME, v,
 				    NULL, *initsize, 0);
 			break;
 
 		case GV_TYPE_PLEX:
 			p = gv_find_plex(sc, argv);
 			if (p != NULL)
 				gv_post_event(sc, GV_EVENT_START_PLEX, p, NULL,
 				    *initsize, 0);
 			break;
 
 		case GV_TYPE_SD:
 		case GV_TYPE_DRIVE:
 			/* XXX Not implemented, but what is the use? */
 			gctl_error(req, "unable to start '%s' - not yet supported",
 			    argv);
 			return;
 		default:
 			gctl_error(req, "unknown object '%s'", argv);
 			return;
 		}
 	}
 }
 
 int
 gv_start_plex(struct gv_plex *p)
 {
 	struct gv_volume *v;
 	struct gv_plex *up;
 	struct gv_sd *s;
 	int error;
 
 	KASSERT(p != NULL, ("gv_start_plex: NULL p"));
 
 	error = 0;
 	v = p->vol_sc;
 
 	/* RAID5 plexes can either be init, rebuilt or grown. */
 	if (p->org == GV_PLEX_RAID5) {
 		if (p->state > GV_PLEX_DEGRADED) {
 			LIST_FOREACH(s, &p->subdisks, in_plex) {
 				if (s->flags & GV_SD_GROW) {
 					error = gv_grow_plex(p);
 					return (error);
 				}
 			}
 		} else if (p->state == GV_PLEX_DEGRADED) {
 			error = gv_rebuild_plex(p);
 		} else
 			error = gv_init_plex(p);
 	} else {
 		/* We want to sync from the other plex if we're down. */
 		if (p->state == GV_PLEX_DOWN && v->plexcount > 1) {
 			up = gv_find_good_plex(v);
 			if (up == NULL) {
 				G_VINUM_DEBUG(1, "unable to find a good plex");
 				return (ENXIO);
 			}
 			g_topology_lock();
 			error = gv_access(v->provider, 1, 1, 0);
 			if (error) {
 				g_topology_unlock();
 				G_VINUM_DEBUG(0, "sync from '%s' failed to "
 				    "access volume: %d", up->name, error);
 				return (error);
 			}
 			g_topology_unlock();
 			error = gv_sync_plex(p, up);
 			if (error)
 				return (error);
 		/*
 		 * In case we have a stripe that is up, check whether it can be
 		 * grown.
 		 */
 		} else if (p->org == GV_PLEX_STRIPED &&
 		    p->state != GV_PLEX_DOWN) {
 			LIST_FOREACH(s, &p->subdisks, in_plex) {
 				if (s->flags & GV_SD_GROW) {
 					error = gv_grow_plex(p);
 					break;
 				}
 			}
 		}
 	}
 	return (error);
 }
 
 int
 gv_start_vol(struct gv_volume *v)
 {
 	struct gv_plex *p;
 	int error;
 
 	KASSERT(v != NULL, ("gv_start_vol: NULL v"));
 
 	error = 0;
 
 	if (v->plexcount == 0)
 		return (ENXIO);
 
 	else if (v->plexcount == 1) {
 		p = LIST_FIRST(&v->plexes);
 		KASSERT(p != NULL, ("gv_start_vol: NULL p on %s", v->name));
 		error = gv_start_plex(p);
 	} else
 		error = gv_sync(v);
 
 	return (error);
 }
 
 /* Sync a plex p from the plex up.  */
 static int
 gv_sync_plex(struct gv_plex *p, struct gv_plex *up)
 {
 	int error;
 
 	KASSERT(p != NULL, ("%s: NULL p", __func__));
 	KASSERT(up != NULL, ("%s: NULL up", __func__));
 	if ((p == up) || (p->state == GV_PLEX_UP))
 		return (0);
 	if (p->flags & GV_PLEX_SYNCING ||
 	    p->flags & GV_PLEX_REBUILDING ||
 	    p->flags & GV_PLEX_GROWING) {
 		return (EINPROGRESS);
 	}
 	p->synced = 0;
 	p->flags |= GV_PLEX_SYNCING;
 	G_VINUM_DEBUG(1, "starting sync of plex %s", p->name);
 	error = gv_sync_request(up, p, p->synced, 
 	    MIN(GV_DFLT_SYNCSIZE, up->size - p->synced), 
 	    BIO_READ, NULL);
 	if (error) {
 		G_VINUM_DEBUG(0, "error syncing plex %s", p->name);
 		return (error);
 	}
 	return (0);
 }
 
 /* Return a good plex from volume v. */
 static struct gv_plex *
 gv_find_good_plex(struct gv_volume *v)
 {
 	struct gv_plex *up;
 
 	/* Find the plex that's up. */
 	up = NULL;
 	LIST_FOREACH(up, &v->plexes, in_volume) {
 		if (up->state == GV_PLEX_UP)
 			break;
 	}
 	/* Didn't find a good plex. */
 	return (up);
 }
 
 static int
 gv_sync(struct gv_volume *v)
 {
 	struct gv_softc *sc;
 	struct gv_plex *p, *up;
 	int error;
 
 	KASSERT(v != NULL, ("gv_sync: NULL v"));
 	sc = v->vinumconf;
 	KASSERT(sc != NULL, ("gv_sync: NULL sc on %s", v->name));
 
 
 	up = gv_find_good_plex(v);
 	if (up == NULL)
 		return (ENXIO);
 	g_topology_lock();
 	error = gv_access(v->provider, 1, 1, 0);
 	if (error) {
 		g_topology_unlock();
 		G_VINUM_DEBUG(0, "sync from '%s' failed to access volume: %d",
 		    up->name, error);
 		return (error);
 	}
 	g_topology_unlock();
 
 	/* Go through the good plex, and issue BIO's to all other plexes. */
 	LIST_FOREACH(p, &v->plexes, in_volume) {
 		error = gv_sync_plex(p, up);
 		if (error)
 			break;
 	}
 	return (0);
 }
 
 static int
 gv_rebuild_plex(struct gv_plex *p)
 {
 	struct gv_drive *d;
 	struct gv_sd *s;
 	int error;
 
 	if (p->flags & GV_PLEX_SYNCING ||
 	    p->flags & GV_PLEX_REBUILDING ||
 	    p->flags & GV_PLEX_GROWING)
 		return (EINPROGRESS);
 	/*
 	 * Make sure that all subdisks have consumers. We won't allow a rebuild
 	 * unless every subdisk have one.
 	 */
 	LIST_FOREACH(s, &p->subdisks, in_plex) {
 		d = s->drive_sc;
 		if (d == NULL || (d->flags & GV_DRIVE_REFERENCED)) {
 			G_VINUM_DEBUG(0, "unable to rebuild %s, subdisk(s) have"
 			    " no drives", p->name);
 			return (ENXIO);
 		}
 	}
 	p->flags |= GV_PLEX_REBUILDING;
 	p->synced = 0;
 
 	g_topology_assert_not();
 	g_topology_lock();
 	error = gv_access(p->vol_sc->provider, 1, 1, 0);
 	if (error) {
 		G_VINUM_DEBUG(0, "unable to access provider");
 		return (0);
 	}
 	g_topology_unlock();
 
 	gv_parity_request(p, GV_BIO_REBUILD, 0);
 	return (0);
 }
 
 static int
 gv_grow_plex(struct gv_plex *p)
 {
 	struct gv_volume *v;
 	struct gv_sd *s;
 	off_t origsize, origlength;
 	int error, sdcount;
 
 	KASSERT(p != NULL, ("gv_grow_plex: NULL p"));
 	v = p->vol_sc;
 	KASSERT(v != NULL, ("gv_grow_plex: NULL v"));
 
 	if (p->flags & GV_PLEX_GROWING || 
 	    p->flags & GV_PLEX_SYNCING ||
 	    p->flags & GV_PLEX_REBUILDING)
 		return (EINPROGRESS);
 	g_topology_lock();
 	error = gv_access(v->provider, 1, 1, 0);
 	g_topology_unlock();
 	if (error) {
 		G_VINUM_DEBUG(0, "unable to access provider");
 		return (error);
 	}
 
 	/* XXX: This routine with finding origsize is used two other places as
 	 * well, so we should create a function for it. */
 	sdcount = p->sdcount;
 	LIST_FOREACH(s, &p->subdisks, in_plex) {
 		if (s->flags & GV_SD_GROW)
 			sdcount--;
 	}
 	s = LIST_FIRST(&p->subdisks);
 	if (s == NULL) {
 		G_VINUM_DEBUG(0, "error growing plex without subdisks");
 		return (GV_ERR_NOTFOUND);
 	}
 	p->flags |= GV_PLEX_GROWING;
 	origsize = (sdcount - 1) * s->size;
 	origlength = (sdcount - 1) * p->stripesize;
 	p->synced = 0;
 	G_VINUM_DEBUG(1, "starting growing of plex %s", p->name);
 	gv_grow_request(p, 0, MIN(origlength, origsize), BIO_READ, NULL);
 
 	return (0);
 }
 
 static int
 gv_init_plex(struct gv_plex *p)
 {
 	struct gv_drive *d;
 	struct gv_sd *s;
 	int error;
 	off_t start;
 	caddr_t data;
 
 	KASSERT(p != NULL, ("gv_init_plex: NULL p"));
 
 	LIST_FOREACH(s, &p->subdisks, in_plex) {
 		if (s->state == GV_SD_INITIALIZING)
 			return (EINPROGRESS);
 		gv_set_sd_state(s, GV_SD_INITIALIZING, GV_SETSTATE_FORCE);
 		s->init_size = GV_DFLT_SYNCSIZE;
 		start = s->drive_offset + s->initialized;
 		d = s->drive_sc;
 		if (d == NULL) {
 			G_VINUM_DEBUG(0, "subdisk %s has no drive yet", s->name);
 			break;
 		}
 		/*
 		 * Take the lock here since we need to avoid a race in
 		 * gv_init_request if the BIO is completed before the lock is
 		 * released.
 		 */
 		g_topology_lock();
 		error = g_access(d->consumer, 0, 1, 0);
 		g_topology_unlock();
 		if (error) {
 			G_VINUM_DEBUG(0, "error accessing consumer when "
 			    "initializing %s", s->name);
 			break;
 		}
 		data = g_malloc(s->init_size, M_WAITOK | M_ZERO);
 		gv_init_request(s, start, data, s->init_size);
 	}
 	return (0);
 }
Index: head/sys/geom/vinum/geom_vinum_move.c
===================================================================
--- head/sys/geom/vinum/geom_vinum_move.c	(revision 350693)
+++ head/sys/geom/vinum/geom_vinum_move.c	(revision 350694)
@@ -1,190 +1,191 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  *  Copyright (c) 2005 Chris Jones
  *  All rights reserved.
  *
  * This software was developed for the FreeBSD Project by Chris Jones
  * thanks to the support of Google's Summer of Code program and
  * mentoring by Lukas Ertl.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/libkern.h>
 #include <sys/malloc.h>
 
 #include <geom/geom.h>
+#include <geom/geom_dbg.h>
 #include <geom/vinum/geom_vinum_var.h>
 #include <geom/vinum/geom_vinum.h>
 
 void
 gv_move(struct g_geom *gp, struct gctl_req *req)
 {
 	struct gv_softc *sc;
 	struct gv_sd *s;
 	struct gv_drive *d;
 	char buf[20], *destination, *object;
 	int *argc, *flags, i, type;
 
 	sc = gp->softc;
 
 	argc = gctl_get_paraml(req, "argc", sizeof(*argc));
 	if (argc == NULL) {
 		gctl_error(req, "no arguments given");
 		return;
 	}
 	flags = gctl_get_paraml(req, "flags", sizeof(*flags));
 	if (flags == NULL) {
 		gctl_error(req, "no flags given");
 		return;
 	}
 	destination = gctl_get_param(req, "destination", NULL);
 	if (destination == NULL) {
 		gctl_error(req, "no destination given");
 		return;
 	}
 	if (gv_object_type(sc, destination) != GV_TYPE_DRIVE) {
 		gctl_error(req, "destination '%s' is not a drive", destination);
 		return;
 	}
 	d = gv_find_drive(sc, destination);
 
 	/*
 	 * We start with 1 here, because argv[0] on the command line is the
 	 * destination drive.
 	 */
 	for (i = 1; i < *argc; i++) {
 		snprintf(buf, sizeof(buf), "argv%d", i);
 		object = gctl_get_param(req, buf, NULL);
 		if (object == NULL)
 			continue;
 
 		type = gv_object_type(sc, object);
 		if (type != GV_TYPE_SD) {
 			gctl_error(req, "you can only move subdisks; "
 			    "'%s' is not a subdisk", object);
 			return;
 		}
 
 		s = gv_find_sd(sc, object);
 		if (s == NULL) {
 			gctl_error(req, "unknown subdisk '%s'", object);
 			return;
 		}
 		gv_post_event(sc, GV_EVENT_MOVE_SD, s, d, *flags, 0);
 	}
 }
 
 /* Move a subdisk. */
 int
 gv_move_sd(struct gv_softc *sc, struct gv_sd *cursd, 
     struct gv_drive *destination, int flags)
 {
 	struct gv_drive *d;
 	struct gv_sd *newsd, *s, *s2;
 	struct gv_plex *p;
 	int err;
 
 	g_topology_assert();
 	KASSERT(cursd != NULL, ("gv_move_sd: NULL cursd"));
 	KASSERT(destination != NULL, ("gv_move_sd: NULL destination"));
 
 	d = cursd->drive_sc;
 
 	if ((gv_consumer_is_open(d->consumer) ||
 	    gv_consumer_is_open(destination->consumer)) &&
 	    !(flags & GV_FLAG_F)) {
 		G_VINUM_DEBUG(0, "consumers on current and destination drive "
 		    " still open");
 		return (GV_ERR_ISBUSY);
 	}
 
 	if (!(flags & GV_FLAG_F)) {
 		G_VINUM_DEBUG(1, "-f flag not passed; move would be "
 		    "destructive");
 		return (GV_ERR_INVFLAG);
 	}
 
 	if (destination == cursd->drive_sc) {
 		G_VINUM_DEBUG(1, "subdisk '%s' already on drive '%s'",
 		    cursd->name, destination->name);
 		return (GV_ERR_ISATTACHED);
 	}
 
 	/* XXX: Does it have to be part of a plex? */
 	p = gv_find_plex(sc, cursd->plex);
 	if (p == NULL) {
 		G_VINUM_DEBUG(0, "subdisk '%s' is not part of a plex",
 		    cursd->name);
 		return (GV_ERR_NOTFOUND);
 	}
 
 	/* Stale the old subdisk. */
 	err = gv_set_sd_state(cursd, GV_SD_STALE,
 	    GV_SETSTATE_FORCE | GV_SETSTATE_CONFIG);
 	if (err) {
 		G_VINUM_DEBUG(0, "unable to set the subdisk '%s' to state "
 		    "'stale'", cursd->name);
 		return (err);
 	}
 
 	/*
 	 * Create new subdisk. Ideally, we'd use gv_new_sd, but that requires
 	 * us to create a string for it to parse, which is silly.
 	 * TODO: maybe refactor gv_new_sd such that this is no longer the case.
 	 */
 	newsd = g_malloc(sizeof(struct gv_sd), M_WAITOK | M_ZERO);
 	newsd->plex_offset = cursd->plex_offset;
 	newsd->size = cursd->size;
 	newsd->drive_offset = -1;
 	strlcpy(newsd->name, cursd->name, sizeof(newsd->name));
 	strlcpy(newsd->drive, destination->name, sizeof(newsd->drive));
 	strlcpy(newsd->plex, cursd->plex, sizeof(newsd->plex));
 	newsd->state = GV_SD_STALE;
 	newsd->vinumconf = cursd->vinumconf;
 
 	err = gv_sd_to_drive(newsd, destination);
 	if (err) {
 		/* XXX not enough free space? */
 		g_free(newsd);
 		return (err);
 	}
 
 	/* Replace the old sd by the new one. */
 	LIST_FOREACH_SAFE(s, &p->subdisks, in_plex, s2) {
 		if (s == cursd) {
 			gv_rm_sd(sc, s);
 		}
 	}
 	gv_sd_to_plex(newsd, p);
 	LIST_INSERT_HEAD(&sc->subdisks, newsd, sd);
 	/* Update volume size of plex. */
 	if (p->vol_sc != NULL)
 		gv_update_vol_size(p->vol_sc, gv_vol_size(p->vol_sc));
 	gv_save_config(p->vinumconf);
 	return (0);
 }
Index: head/sys/geom/vinum/geom_vinum_plex.c
===================================================================
--- head/sys/geom/vinum/geom_vinum_plex.c	(revision 350693)
+++ head/sys/geom/vinum/geom_vinum_plex.c	(revision 350694)
@@ -1,1050 +1,1051 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2004, 2007 Lukas Ertl
  * Copyright (c) 2007, 2009 Ulf Lilleengen
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/bio.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/systm.h>
 
 #include <geom/geom.h>
+#include <geom/geom_dbg.h>
 #include <geom/vinum/geom_vinum_var.h>
 #include <geom/vinum/geom_vinum_raid5.h>
 #include <geom/vinum/geom_vinum.h>
 
 static int	gv_check_parity(struct gv_plex *, struct bio *,
 		    struct gv_raid5_packet *);
 static int	gv_normal_parity(struct gv_plex *, struct bio *,
 		    struct gv_raid5_packet *);
 static void	gv_plex_flush(struct gv_plex *);
 static int	gv_plex_offset(struct gv_plex *, off_t, off_t, off_t *, off_t *,
 		    int *, int);
 static int 	gv_plex_normal_request(struct gv_plex *, struct bio *, off_t,
 		    off_t,  caddr_t);
 static void	gv_post_bio(struct gv_softc *, struct bio *);
 
 void
 gv_plex_start(struct gv_plex *p, struct bio *bp)
 {
 	struct bio *cbp;
 	struct gv_sd *s;
 	struct gv_raid5_packet *wp;
 	caddr_t addr;
 	off_t bcount, boff, len;
 
 	bcount = bp->bio_length;
 	addr = bp->bio_data;
 	boff = bp->bio_offset;
 
 	/* Walk over the whole length of the request, we might split it up. */
 	while (bcount > 0) {
 		wp = NULL;
 
  		/*
 		 * RAID5 plexes need special treatment, as a single request
 		 * might involve several read/write sub-requests.
  		 */
 		if (p->org == GV_PLEX_RAID5) {
 			wp = gv_raid5_start(p, bp, addr, boff, bcount);
  			if (wp == NULL)
  				return;
  
 			len = wp->length;
 
 			if (TAILQ_EMPTY(&wp->bits))
 				g_free(wp);
 			else if (wp->lockbase != -1)
 				TAILQ_INSERT_TAIL(&p->packets, wp, list);
 
 		/*
 		 * Requests to concatenated and striped plexes go straight
 		 * through.
 		 */
 		} else {
 			len = gv_plex_normal_request(p, bp, boff, bcount, addr);
 		}
 		if (len < 0)
 			return;
 			
 		bcount -= len;
 		addr += len;
 		boff += len;
 	}
 
 	/*
 	 * Fire off all sub-requests.  We get the correct consumer (== drive)
 	 * to send each request to via the subdisk that was stored in
 	 * cbp->bio_caller1.
 	 */
 	cbp = bioq_takefirst(p->bqueue);
 	while (cbp != NULL) {
 		/*
 		 * RAID5 sub-requests need to come in correct order, otherwise
 		 * we trip over the parity, as it might be overwritten by
 		 * another sub-request.  We abuse cbp->bio_caller2 to mark
 		 * potential overlap situations. 
 		 */
 		if (cbp->bio_caller2 != NULL && gv_stripe_active(p, cbp)) {
 			/* Park the bio on the waiting queue. */
 			cbp->bio_pflags |= GV_BIO_ONHOLD;
 			bioq_disksort(p->wqueue, cbp);
 		} else {
 			s = cbp->bio_caller1;
 			g_io_request(cbp, s->drive_sc->consumer);
 		}
 		cbp = bioq_takefirst(p->bqueue);
 	}
 }
 
 static int
 gv_plex_offset(struct gv_plex *p, off_t boff, off_t bcount, off_t *real_off,
     off_t *real_len, int *sdno, int growing)
 {
 	struct gv_sd *s;
 	int i, sdcount;
 	off_t len_left, stripeend, stripeno, stripestart;
 
 	switch (p->org) {
 	case GV_PLEX_CONCAT:
 		/*
 		 * Find the subdisk where this request starts.  The subdisks in
 		 * this list must be ordered by plex_offset.
 		 */
 		i = 0;
 		LIST_FOREACH(s, &p->subdisks, in_plex) {
 			if (s->plex_offset <= boff &&
 			    s->plex_offset + s->size > boff) {
 				*sdno = i;
 				break;
 			}
 			i++;
 		}
 		if (s == NULL || s->drive_sc == NULL)
 			return (GV_ERR_NOTFOUND);
 
 		/* Calculate corresponding offsets on disk. */
 		*real_off = boff - s->plex_offset;
 		len_left = s->size - (*real_off);
 		KASSERT(len_left >= 0, ("gv_plex_offset: len_left < 0"));
 		*real_len = (bcount > len_left) ? len_left : bcount;
 		break;
 
 	case GV_PLEX_STRIPED:
 		/* The number of the stripe where the request starts. */
 		stripeno = boff / p->stripesize;
 		KASSERT(stripeno >= 0, ("gv_plex_offset: stripeno < 0"));
 
 		/* Take growing subdisks into account when calculating. */
 		sdcount = gv_sdcount(p, (boff >= p->synced));
 
 		if (!(boff + bcount <= p->synced) &&
 		    (p->flags & GV_PLEX_GROWING) &&
 		    !growing)
 			return (GV_ERR_ISBUSY);
 		*sdno = stripeno % sdcount;
 
 		KASSERT(sdno >= 0, ("gv_plex_offset: sdno < 0"));
 		stripestart = (stripeno / sdcount) *
 		    p->stripesize;
 		KASSERT(stripestart >= 0, ("gv_plex_offset: stripestart < 0"));
 		stripeend = stripestart + p->stripesize;
 		*real_off = boff - (stripeno * p->stripesize) +
 		    stripestart;
 		len_left = stripeend - *real_off;
 		KASSERT(len_left >= 0, ("gv_plex_offset: len_left < 0"));
 
 		*real_len = (bcount <= len_left) ? bcount : len_left;
 		break;
 
 	default:
 		return (GV_ERR_PLEXORG);
 	}
 	return (0);
 }
 
 /*
  * Prepare a normal plex request.
  */
 static int 
 gv_plex_normal_request(struct gv_plex *p, struct bio *bp, off_t boff,
     off_t bcount,  caddr_t addr)
 {
 	struct gv_sd *s;
 	struct bio *cbp;
 	off_t real_len, real_off;
 	int i, err, sdno;
 
 	s = NULL;
 	sdno = -1;
 	real_len = real_off = 0;
 
 	err = ENXIO;
 
 	if (p == NULL || LIST_EMPTY(&p->subdisks)) 
 		goto bad;
 
 	err = gv_plex_offset(p, boff, bcount, &real_off,
 	    &real_len, &sdno, (bp->bio_pflags & GV_BIO_GROW));
 	/* If the request was blocked, put it into wait. */
 	if (err == GV_ERR_ISBUSY) {
 		bioq_disksort(p->rqueue, bp);
 		return (-1); /* "Fail", and delay request. */
 	}
 	if (err) {
 		err = ENXIO;
 		goto bad;
 	}
 	err = ENXIO;
 
 	/* Find the right subdisk. */
 	i = 0;
 	LIST_FOREACH(s, &p->subdisks, in_plex) {
 		if (i == sdno)
 			break;
 		i++;
 	}
 
 	/* Subdisk not found. */
 	if (s == NULL || s->drive_sc == NULL)
 		goto bad;
 
 	/* Now check if we can handle the request on this subdisk. */
 	switch (s->state) {
 	case GV_SD_UP:
 		/* If the subdisk is up, just continue. */
 		break;
 	case GV_SD_DOWN:
 		if (bp->bio_pflags & GV_BIO_INTERNAL)
 			G_VINUM_DEBUG(0, "subdisk must be in the stale state in"
 			    " order to perform administrative requests");
 		goto bad;
 	case GV_SD_STALE:
 		if (!(bp->bio_pflags & GV_BIO_SYNCREQ)) {
 			G_VINUM_DEBUG(0, "subdisk stale, unable to perform "
 			    "regular requests");
 			goto bad;
 		}
 
 		G_VINUM_DEBUG(1, "sd %s is initializing", s->name);
 		gv_set_sd_state(s, GV_SD_INITIALIZING, GV_SETSTATE_FORCE);
 		break;
 	case GV_SD_INITIALIZING:
 		if (bp->bio_cmd == BIO_READ)
 			goto bad;
 		break;
 	default:
 		/* All other subdisk states mean it's not accessible. */
 		goto bad;
 	}
 
 	/* Clone the bio and adjust the offsets and sizes. */
 	cbp = g_clone_bio(bp);
 	if (cbp == NULL) {
 		err = ENOMEM;
 		goto bad;
 	}
 	cbp->bio_offset = real_off + s->drive_offset;
 	cbp->bio_length = real_len;
 	cbp->bio_data = addr;
 	cbp->bio_done = gv_done;
 	cbp->bio_caller1 = s;
 
 	/* Store the sub-requests now and let others issue them. */
 	bioq_insert_tail(p->bqueue, cbp); 
 	return (real_len);
 bad:
 	G_VINUM_LOGREQ(0, bp, "plex request failed.");
 	/* Building the sub-request failed. If internal BIO, do not deliver. */
 	if (bp->bio_pflags & GV_BIO_INTERNAL) {
 		if (bp->bio_pflags & GV_BIO_MALLOC)
 			g_free(bp->bio_data);
 		g_destroy_bio(bp);
 		p->flags &= ~(GV_PLEX_SYNCING | GV_PLEX_REBUILDING |
 		    GV_PLEX_GROWING);
 		return (-1);
 	}
 	g_io_deliver(bp, err);
 	return (-1);
 }
 
 /*
  * Handle a completed request to a striped or concatenated plex.
  */
 void
 gv_plex_normal_done(struct gv_plex *p, struct bio *bp)
 {
 	struct bio *pbp;
 
 	pbp = bp->bio_parent;
 	if (pbp->bio_error == 0)
 		pbp->bio_error = bp->bio_error;
 	g_destroy_bio(bp);
 	pbp->bio_inbed++;
 	if (pbp->bio_children == pbp->bio_inbed) {
 		/* Just set it to length since multiple plexes will
 		 * screw things up. */
 		pbp->bio_completed = pbp->bio_length;
 		if (pbp->bio_pflags & GV_BIO_SYNCREQ)
 			gv_sync_complete(p, pbp);
 		else if (pbp->bio_pflags & GV_BIO_GROW)
 			gv_grow_complete(p, pbp);
 		else
 			g_io_deliver(pbp, pbp->bio_error);
 	}
 }
 
 /*
  * Handle a completed request to a RAID-5 plex.
  */
 void
 gv_plex_raid5_done(struct gv_plex *p, struct bio *bp)
 {
 	struct gv_softc *sc;
 	struct bio *cbp, *pbp;
 	struct gv_bioq *bq, *bq2;
 	struct gv_raid5_packet *wp;
 	off_t completed;
 	int i;
 
 	completed = 0;
 	sc = p->vinumconf;
 	wp = bp->bio_caller2;
 
 	switch (bp->bio_parent->bio_cmd) {
 	case BIO_READ:
 		if (wp == NULL) {
 			completed = bp->bio_completed;
 			break;
 		}
 
 		TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) {
 			if (bq->bp != bp)
 				continue;
 			TAILQ_REMOVE(&wp->bits, bq, queue);
 			g_free(bq);
 			for (i = 0; i < wp->length; i++)
 				wp->data[i] ^= bp->bio_data[i];
 			break;
 		}
 		if (TAILQ_EMPTY(&wp->bits)) {
 			completed = wp->length;
 			if (wp->lockbase != -1) {
 				TAILQ_REMOVE(&p->packets, wp, list);
 				/* Bring the waiting bios back into the game. */
 				pbp = bioq_takefirst(p->wqueue);
 				while (pbp != NULL) {
 					gv_post_bio(sc, pbp);
 					pbp = bioq_takefirst(p->wqueue);
 				}
 			}
 			g_free(wp);
 		}
 
 		break;
 
  	case BIO_WRITE:
 		/* XXX can this ever happen? */
 		if (wp == NULL) {
 			completed = bp->bio_completed;
 			break;
 		}
 
 		/* Check if we need to handle parity data. */
 		TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) {
 			if (bq->bp != bp)
 				continue;
 			TAILQ_REMOVE(&wp->bits, bq, queue);
 			g_free(bq);
 			cbp = wp->parity;
 			if (cbp != NULL) {
 				for (i = 0; i < wp->length; i++)
 					cbp->bio_data[i] ^= bp->bio_data[i];
 			}
 			break;
 		}
 
 		/* Handle parity data. */
 		if (TAILQ_EMPTY(&wp->bits)) {
 			if (bp->bio_parent->bio_pflags & GV_BIO_CHECK)
 				i = gv_check_parity(p, bp, wp);
 			else
 				i = gv_normal_parity(p, bp, wp);
 
 			/* All of our sub-requests have finished. */
 			if (i) {
 				completed = wp->length;
 				TAILQ_REMOVE(&p->packets, wp, list);
 				/* Bring the waiting bios back into the game. */
 				pbp = bioq_takefirst(p->wqueue);
 				while (pbp != NULL) {
 					gv_post_bio(sc, pbp);
 					pbp = bioq_takefirst(p->wqueue);
 				}
 				g_free(wp);
 			}
 		}
 
 		break;
 	}
 
 	pbp = bp->bio_parent;
 	if (pbp->bio_error == 0)
 		pbp->bio_error = bp->bio_error;
 	pbp->bio_completed += completed;
 
 	/* When the original request is finished, we deliver it. */
 	pbp->bio_inbed++;
 	if (pbp->bio_inbed == pbp->bio_children) {
 		/* Hand it over for checking or delivery. */
 		if (pbp->bio_cmd == BIO_WRITE &&
 		    (pbp->bio_pflags & GV_BIO_CHECK)) {
 			gv_parity_complete(p, pbp);
 		} else if (pbp->bio_cmd == BIO_WRITE &&
 		    (pbp->bio_pflags & GV_BIO_REBUILD)) {
 			gv_rebuild_complete(p, pbp);
 		} else if (pbp->bio_pflags & GV_BIO_INIT) {
 			gv_init_complete(p, pbp);
 		} else if (pbp->bio_pflags & GV_BIO_SYNCREQ) {
 			gv_sync_complete(p, pbp);
 		} else if (pbp->bio_pflags & GV_BIO_GROW) {
 			gv_grow_complete(p, pbp);
 		} else {
 			g_io_deliver(pbp, pbp->bio_error);
 		}
 	}
 
 	/* Clean up what we allocated. */
 	if (bp->bio_cflags & GV_BIO_MALLOC)
 		g_free(bp->bio_data);
 	g_destroy_bio(bp);
 }
 
 static int
 gv_check_parity(struct gv_plex *p, struct bio *bp, struct gv_raid5_packet *wp)
 {
 	struct bio *pbp;
 	struct gv_sd *s;
 	int err, finished, i;
 
 	err = 0;
 	finished = 1;
 
 	if (wp->waiting != NULL) {
 		pbp = wp->waiting;
 		wp->waiting = NULL;
 		s = pbp->bio_caller1;
 		g_io_request(pbp, s->drive_sc->consumer);
 		finished = 0;
 
 	} else if (wp->parity != NULL) {
 		pbp = wp->parity;
 		wp->parity = NULL;
 
 		/* Check if the parity is correct. */
 		for (i = 0; i < wp->length; i++) {
 			if (bp->bio_data[i] != pbp->bio_data[i]) {
 				err = 1;
 				break;
 			}
 		}
 
 		/* The parity is not correct... */
 		if (err) {
 			bp->bio_parent->bio_error = EAGAIN;
 
 			/* ... but we rebuild it. */
 			if (bp->bio_parent->bio_pflags & GV_BIO_PARITY) {
 				s = pbp->bio_caller1;
 				g_io_request(pbp, s->drive_sc->consumer);
 				finished = 0;
 			}
 		}
 
 		/*
 		 * Clean up the BIO we would have used for rebuilding the
 		 * parity.
 		 */
 		if (finished) {
 			bp->bio_parent->bio_inbed++;
 			g_destroy_bio(pbp);
 		}
 
 	}
 
 	return (finished);
 }
 
 static int
 gv_normal_parity(struct gv_plex *p, struct bio *bp, struct gv_raid5_packet *wp)
 {
 	struct bio *cbp, *pbp;
 	struct gv_sd *s;
 	int finished, i;
 
 	finished = 1;
 
 	if (wp->waiting != NULL) {
 		pbp = wp->waiting;
 		wp->waiting = NULL;
 		cbp = wp->parity;
 		for (i = 0; i < wp->length; i++)
 			cbp->bio_data[i] ^= pbp->bio_data[i];
 		s = pbp->bio_caller1;
 		g_io_request(pbp, s->drive_sc->consumer);
 		finished = 0;
 
 	} else if (wp->parity != NULL) {
 		cbp = wp->parity;
 		wp->parity = NULL;
 		s = cbp->bio_caller1;
 		g_io_request(cbp, s->drive_sc->consumer);
 		finished = 0;
 	}
 
 	return (finished);
 }
 
 /* Flush the queue with delayed requests. */
 static void
 gv_plex_flush(struct gv_plex *p)
 {
 	struct gv_softc *sc;
 	struct bio *bp;
 
 	sc = p->vinumconf;
 	bp = bioq_takefirst(p->rqueue);
 	while (bp != NULL) {
 		gv_plex_start(p, bp);
 		bp = bioq_takefirst(p->rqueue);
 	}
 }
 
 static void
 gv_post_bio(struct gv_softc *sc, struct bio *bp)
 {
 
 	KASSERT(sc != NULL, ("NULL sc"));
 	KASSERT(bp != NULL, ("NULL bp"));
 	mtx_lock(&sc->bqueue_mtx);
 	bioq_disksort(sc->bqueue_down, bp);
 	wakeup(sc);
 	mtx_unlock(&sc->bqueue_mtx);
 }
 
 int
 gv_sync_request(struct gv_plex *from, struct gv_plex *to, off_t offset,
     off_t length, int type, caddr_t data)
 {
 	struct gv_softc *sc;
 	struct bio *bp;
 
 	KASSERT(from != NULL, ("NULL from"));
 	KASSERT(to != NULL, ("NULL to"));
 	sc = from->vinumconf;
 	KASSERT(sc != NULL, ("NULL sc"));
 
 	bp = g_new_bio();
 	if (bp == NULL) {
 		G_VINUM_DEBUG(0, "sync from '%s' failed at offset "
 		    " %jd; out of memory", from->name, offset);
 		return (ENOMEM);
 	}
 	bp->bio_length = length;
 	bp->bio_done = gv_done;
 	bp->bio_pflags |= GV_BIO_SYNCREQ;
 	bp->bio_offset = offset;
 	bp->bio_caller1 = from;		
 	bp->bio_caller2 = to;
 	bp->bio_cmd = type;
 	if (data == NULL)
 		data = g_malloc(length, M_WAITOK);
 	bp->bio_pflags |= GV_BIO_MALLOC; /* Free on the next run. */
 	bp->bio_data = data;
 
 	/* Send down next. */
 	gv_post_bio(sc, bp);
 	//gv_plex_start(from, bp);
 	return (0);
 }
 
 /*
  * Handle a finished plex sync bio.
  */
 int
 gv_sync_complete(struct gv_plex *to, struct bio *bp)
 {
 	struct gv_plex *from, *p;
 	struct gv_sd *s;
 	struct gv_volume *v;
 	struct gv_softc *sc;
 	off_t offset;
 	int err;
 
 	g_topology_assert_not();
 
 	err = 0;
 	KASSERT(to != NULL, ("NULL to"));
 	KASSERT(bp != NULL, ("NULL bp"));
 	from = bp->bio_caller2;
 	KASSERT(from != NULL, ("NULL from"));
 	v = to->vol_sc;
 	KASSERT(v != NULL, ("NULL v"));
 	sc = v->vinumconf;
 	KASSERT(sc != NULL, ("NULL sc"));
 
 	/* If it was a read, write it. */
 	if (bp->bio_cmd == BIO_READ) {
 		err = gv_sync_request(from, to, bp->bio_offset, bp->bio_length,
 	    	    BIO_WRITE, bp->bio_data);
 	/* If it was a write, read the next one. */
 	} else if (bp->bio_cmd == BIO_WRITE) {
 		if (bp->bio_pflags & GV_BIO_MALLOC)
 			g_free(bp->bio_data);
 		to->synced += bp->bio_length;
 		/* If we're finished, clean up. */
 		if (bp->bio_offset + bp->bio_length >= from->size) {
 			G_VINUM_DEBUG(1, "syncing of %s from %s completed",
 			    to->name, from->name);
 			/* Update our state. */
 			LIST_FOREACH(s, &to->subdisks, in_plex)
 				gv_set_sd_state(s, GV_SD_UP, 0);
 			gv_update_plex_state(to);
 			to->flags &= ~GV_PLEX_SYNCING;
 			to->synced = 0;
 			gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0);
 		} else {
 			offset = bp->bio_offset + bp->bio_length;
 			err = gv_sync_request(from, to, offset,
 			    MIN(bp->bio_length, from->size - offset),
 			    BIO_READ, NULL);
 		}
 	}
 	g_destroy_bio(bp);
 	/* Clean up if there was an error. */
 	if (err) {
 		to->flags &= ~GV_PLEX_SYNCING;
 		G_VINUM_DEBUG(0, "error syncing plexes: error code %d", err);
 	}
 
 	/* Check if all plexes are synced, and lower refcounts. */
 	g_topology_lock();
 	LIST_FOREACH(p, &v->plexes, in_volume) {
 		if (p->flags & GV_PLEX_SYNCING) {
 			g_topology_unlock();
 			return (-1);
 		}
 	}
 	/* If we came here, all plexes are synced, and we're free. */
 	gv_access(v->provider, -1, -1, 0);
 	g_topology_unlock();
 	G_VINUM_DEBUG(1, "plex sync completed");
 	gv_volume_flush(v);
 	return (0);
 }
 
 /*
  * Create a new bio struct for the next grow request.
  */
 int
 gv_grow_request(struct gv_plex *p, off_t offset, off_t length, int type,
     caddr_t data)
 {
 	struct gv_softc *sc;
 	struct bio *bp;
 
 	KASSERT(p != NULL, ("gv_grow_request: NULL p"));
 	sc = p->vinumconf;
 	KASSERT(sc != NULL, ("gv_grow_request: NULL sc"));
 
 	bp = g_new_bio();
 	if (bp == NULL) {
 		G_VINUM_DEBUG(0, "grow of %s failed creating bio: "
 		    "out of memory", p->name);
 		return (ENOMEM);
 	}
 
 	bp->bio_cmd = type;
 	bp->bio_done = gv_done;
 	bp->bio_error = 0;
 	bp->bio_caller1 = p;
 	bp->bio_offset = offset;
 	bp->bio_length = length;
 	bp->bio_pflags |= GV_BIO_GROW;
 	if (data == NULL)
 		data = g_malloc(length, M_WAITOK);
 	bp->bio_pflags |= GV_BIO_MALLOC;
 	bp->bio_data = data;
 
 	gv_post_bio(sc, bp);
 	//gv_plex_start(p, bp);
 	return (0);
 }
 
 /*
  * Finish handling of a bio to a growing plex.
  */
 void
 gv_grow_complete(struct gv_plex *p, struct bio *bp)
 {
 	struct gv_softc *sc;
 	struct gv_sd *s;
 	struct gv_volume *v;
 	off_t origsize, offset;
 	int sdcount, err;
 
 	v = p->vol_sc;
 	KASSERT(v != NULL, ("gv_grow_complete: NULL v"));
 	sc = v->vinumconf;
 	KASSERT(sc != NULL, ("gv_grow_complete: NULL sc"));
 	err = 0;
 
 	/* If it was a read, write it. */
 	if (bp->bio_cmd == BIO_READ) {
 		p->synced += bp->bio_length;
 		err = gv_grow_request(p, bp->bio_offset, bp->bio_length,
 		    BIO_WRITE, bp->bio_data);
 	/* If it was a write, read next. */
 	} else if (bp->bio_cmd == BIO_WRITE) {
 		if (bp->bio_pflags & GV_BIO_MALLOC)
 			g_free(bp->bio_data);
 
 		/* Find the real size of the plex. */
 		sdcount = gv_sdcount(p, 1);
 		s = LIST_FIRST(&p->subdisks);
 		KASSERT(s != NULL, ("NULL s"));
 		origsize = (s->size * (sdcount - 1));
 		if (bp->bio_offset + bp->bio_length >= origsize) {
 			G_VINUM_DEBUG(1, "growing of %s completed", p->name);
 			p->flags &= ~GV_PLEX_GROWING;
 			LIST_FOREACH(s, &p->subdisks, in_plex) {
 				s->flags &= ~GV_SD_GROW;
 				gv_set_sd_state(s, GV_SD_UP, 0);
 			}
 			p->size = gv_plex_size(p);
 			gv_update_vol_size(v, gv_vol_size(v));
 			gv_set_plex_state(p, GV_PLEX_UP, 0);
 			g_topology_lock();
 			gv_access(v->provider, -1, -1, 0);
 			g_topology_unlock();
 			p->synced = 0;
 			gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0);
 			/* Issue delayed requests. */
 			gv_plex_flush(p);
 		} else {
 			offset = bp->bio_offset + bp->bio_length;
 			err = gv_grow_request(p, offset,
 			   MIN(bp->bio_length, origsize - offset),
 			   BIO_READ, NULL);
 		}
 	}
 	g_destroy_bio(bp);
 
 	if (err) {
 		p->flags &= ~GV_PLEX_GROWING;
 		G_VINUM_DEBUG(0, "error growing plex: error code %d", err);
 	}
 }
 
 
 /*
  * Create an initialization BIO and send it off to the consumer. Assume that
  * we're given initialization data as parameter.
  */
 void
 gv_init_request(struct gv_sd *s, off_t start, caddr_t data, off_t length)
 {
 	struct gv_drive *d;
 	struct g_consumer *cp;
 	struct bio *bp, *cbp;
 
 	KASSERT(s != NULL, ("gv_init_request: NULL s"));
 	d = s->drive_sc;
 	KASSERT(d != NULL, ("gv_init_request: NULL d"));
 	cp = d->consumer;
 	KASSERT(cp != NULL, ("gv_init_request: NULL cp"));
 
 	bp = g_new_bio();
 	if (bp == NULL) {
 		G_VINUM_DEBUG(0, "subdisk '%s' init: write failed at offset %jd"
 		    " (drive offset %jd); out of memory", s->name,
 		    (intmax_t)s->initialized, (intmax_t)start);
 		return; /* XXX: Error codes. */
 	}
 	bp->bio_cmd = BIO_WRITE;
 	bp->bio_data = data;
 	bp->bio_done = gv_done;
 	bp->bio_error = 0;
 	bp->bio_length = length;
 	bp->bio_pflags |= GV_BIO_INIT;
 	bp->bio_offset = start;
 	bp->bio_caller1 = s;
 
 	/* Then ofcourse, we have to clone it. */
 	cbp = g_clone_bio(bp);
 	if (cbp == NULL) {
 		G_VINUM_DEBUG(0, "subdisk '%s' init: write failed at offset %jd"
 		    " (drive offset %jd); out of memory", s->name,
 		    (intmax_t)s->initialized, (intmax_t)start);
 		return; /* XXX: Error codes. */
 	}
 	cbp->bio_done = gv_done;
 	cbp->bio_caller1 = s;
 	/* Send it off to the consumer. */
 	g_io_request(cbp, cp);
 }
 
 /*
  * Handle a finished initialization BIO.
  */
 void
 gv_init_complete(struct gv_plex *p, struct bio *bp)
 {
 	struct gv_softc *sc;
 	struct gv_drive *d;
 	struct g_consumer *cp;
 	struct gv_sd *s;
 	off_t start, length;
 	caddr_t data;
 	int error;
 
 	s = bp->bio_caller1;
 	start = bp->bio_offset;
 	length = bp->bio_length;
 	error = bp->bio_error;
 	data = bp->bio_data;
 
 	KASSERT(s != NULL, ("gv_init_complete: NULL s"));
 	d = s->drive_sc;
 	KASSERT(d != NULL, ("gv_init_complete: NULL d"));
 	cp = d->consumer;
 	KASSERT(cp != NULL, ("gv_init_complete: NULL cp"));
 	sc = p->vinumconf;
 	KASSERT(sc != NULL, ("gv_init_complete: NULL sc"));
 
 	g_destroy_bio(bp);
 
 	/*
 	 * First we need to find out if it was okay, and abort if it's not.
 	 * Then we need to free previous buffers, find out the correct subdisk,
 	 * as well as getting the correct starting point and length of the BIO.
 	 */
 	if (start >= s->drive_offset + s->size) {
 		/* Free the data we initialized. */
 		if (data != NULL)
 			g_free(data);
 		g_topology_assert_not();
 		g_topology_lock();
 		g_access(cp, 0, -1, 0);
 		g_topology_unlock();
 		if (error) {
 			gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE |
 			    GV_SETSTATE_CONFIG);
 		} else {
 			gv_set_sd_state(s, GV_SD_UP, GV_SETSTATE_CONFIG);
 			s->initialized = 0;
 			gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0);
 			G_VINUM_DEBUG(1, "subdisk '%s' init: finished "
 			    "successfully", s->name);
 		}
 		return;
 	}
 	s->initialized += length;
 	start += length;
 	gv_init_request(s, start, data, length);
 }
 
 /*
  * Create a new bio struct for the next parity rebuild. Used both by internal
  * rebuild of degraded plexes as well as user initiated rebuilds/checks.
  */
 void
 gv_parity_request(struct gv_plex *p, int flags, off_t offset)
 {
 	struct gv_softc *sc;
 	struct bio *bp;
 
 	KASSERT(p != NULL, ("gv_parity_request: NULL p"));
 	sc = p->vinumconf;
 	KASSERT(sc != NULL, ("gv_parity_request: NULL sc"));
 
 	bp = g_new_bio();
 	if (bp == NULL) {
 		G_VINUM_DEBUG(0, "rebuild of %s failed creating bio: "
 		    "out of memory", p->name);
 		return;
 	}
 
 	bp->bio_cmd = BIO_WRITE;
 	bp->bio_done = gv_done;
 	bp->bio_error = 0;
 	bp->bio_length = p->stripesize;
 	bp->bio_caller1 = p;
 
 	/*
 	 * Check if it's a rebuild of a degraded plex or a user request of
 	 * parity rebuild.
 	 */
 	if (flags & GV_BIO_REBUILD)
 		bp->bio_data = g_malloc(GV_DFLT_SYNCSIZE, M_WAITOK);
 	else if (flags & GV_BIO_CHECK)
 		bp->bio_data = g_malloc(p->stripesize, M_WAITOK | M_ZERO);
 	else {
 		G_VINUM_DEBUG(0, "invalid flags given in rebuild");
 		return;
 	}
 
 	bp->bio_pflags = flags;
 	bp->bio_pflags |= GV_BIO_MALLOC;
 
 	/* We still have more parity to build. */
 	bp->bio_offset = offset;
 	gv_post_bio(sc, bp);
 	//gv_plex_start(p, bp); /* Send it down to the plex. */
 }
 
 /*
  * Handle a finished parity write.
  */
 void
 gv_parity_complete(struct gv_plex *p, struct bio *bp)
 {
 	struct gv_softc *sc;
 	int error, flags;
 
 	error = bp->bio_error;
 	flags = bp->bio_pflags;
 	flags &= ~GV_BIO_MALLOC;
 
 	sc = p->vinumconf;
 	KASSERT(sc != NULL, ("gv_parity_complete: NULL sc"));
 
 	/* Clean up what we allocated. */
 	if (bp->bio_pflags & GV_BIO_MALLOC)
 		g_free(bp->bio_data);
 	g_destroy_bio(bp);
 
 	if (error == EAGAIN) {
 		G_VINUM_DEBUG(0, "parity incorrect at offset 0x%jx",
 		    (intmax_t)p->synced);
 	}
 
 	/* Any error is fatal, except EAGAIN when we're rebuilding. */
 	if (error && !(error == EAGAIN && (flags & GV_BIO_PARITY))) {
 		/* Make sure we don't have the lock. */
 		g_topology_assert_not();
 		g_topology_lock();
 		gv_access(p->vol_sc->provider, -1, -1, 0);
 		g_topology_unlock();
 		G_VINUM_DEBUG(0, "parity check on %s failed at 0x%jx "
 		    "errno %d", p->name, (intmax_t)p->synced, error);
 		return;
 	} else {
 		p->synced += p->stripesize;
 	}
 
 	if (p->synced >= p->size) {
 		/* Make sure we don't have the lock. */
 		g_topology_assert_not();
 		g_topology_lock();
 		gv_access(p->vol_sc->provider, -1, -1, 0);
 		g_topology_unlock();
 		/* We're finished. */
 		G_VINUM_DEBUG(1, "parity operation on %s finished", p->name);
 		p->synced = 0;
 		gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0);
 		return;
 	}
 
 	/* Send down next. It will determine if we need to itself. */
 	gv_parity_request(p, flags, p->synced);
 }
 
 /*
  * Handle a finished plex rebuild bio.
  */
 void
 gv_rebuild_complete(struct gv_plex *p, struct bio *bp)
 {
 	struct gv_softc *sc;
 	struct gv_sd *s;
 	int error, flags;
 	off_t offset;
 
 	error = bp->bio_error;
 	flags = bp->bio_pflags;
 	offset = bp->bio_offset;
 	flags &= ~GV_BIO_MALLOC;
 	sc = p->vinumconf;
 	KASSERT(sc != NULL, ("gv_rebuild_complete: NULL sc"));
 
 	/* Clean up what we allocated. */
 	if (bp->bio_pflags & GV_BIO_MALLOC)
 		g_free(bp->bio_data);
 	g_destroy_bio(bp);
 
 	if (error) {
 		g_topology_assert_not();
 		g_topology_lock();
 		gv_access(p->vol_sc->provider, -1, -1, 0);
 		g_topology_unlock();
 	
 		G_VINUM_DEBUG(0, "rebuild of %s failed at offset %jd errno: %d",
 		    p->name, (intmax_t)offset, error);
 		p->flags &= ~GV_PLEX_REBUILDING;
 		p->synced = 0;
 		gv_plex_flush(p); /* Flush out remaining rebuild BIOs. */
 		return;
 	}
 
 	offset += (p->stripesize * (gv_sdcount(p, 1) - 1));
 	if (offset >= p->size) {
 		/* We're finished. */
 		g_topology_assert_not();
 		g_topology_lock();
 		gv_access(p->vol_sc->provider, -1, -1, 0);
 		g_topology_unlock();
 	
 		G_VINUM_DEBUG(1, "rebuild of %s finished", p->name);
 		gv_save_config(p->vinumconf);
 		p->flags &= ~GV_PLEX_REBUILDING;
 		p->synced = 0;
 		/* Try to up all subdisks. */
 		LIST_FOREACH(s, &p->subdisks, in_plex)
 			gv_update_sd_state(s);
 		gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0);
 		gv_plex_flush(p); /* Flush out remaining rebuild BIOs. */
 		return;
 	}
 
 	/* Send down next. It will determine if we need to itself. */
 	gv_parity_request(p, flags, offset);
 }
Index: head/sys/geom/vinum/geom_vinum_raid5.c
===================================================================
--- head/sys/geom/vinum/geom_vinum_raid5.c	(revision 350693)
+++ head/sys/geom/vinum/geom_vinum_raid5.c	(revision 350694)
@@ -1,663 +1,664 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2004, 2007 Lukas Ertl
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/bio.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/systm.h>
 
 #include <geom/geom.h>
+#include <geom/geom_dbg.h>
 #include <geom/vinum/geom_vinum_var.h>
 #include <geom/vinum/geom_vinum_raid5.h>
 #include <geom/vinum/geom_vinum.h>
 
 static int		gv_raid5_offset(struct gv_plex *, off_t, off_t,
 			    off_t *, off_t *, int *, int *, int);
 static struct bio *	gv_raid5_clone_bio(struct bio *, struct gv_sd *,
 			    struct gv_raid5_packet *, caddr_t, int);
 static int	gv_raid5_request(struct gv_plex *, struct gv_raid5_packet *,
 		    struct bio *, caddr_t, off_t, off_t, int *);
 static int	gv_raid5_check(struct gv_plex *, struct gv_raid5_packet *,
 		    struct bio *, caddr_t, off_t, off_t);
 static int	gv_raid5_rebuild(struct gv_plex *, struct gv_raid5_packet *,
 		    struct bio *, caddr_t, off_t, off_t);
 
 struct gv_raid5_packet *
 gv_raid5_start(struct gv_plex *p, struct bio *bp, caddr_t addr, off_t boff,
     off_t bcount)
 {
 	struct bio *cbp;
 	struct gv_raid5_packet *wp, *wp2;
 	struct gv_bioq *bq, *bq2;
 	int err, delay;
 
 	delay = 0;
 	wp = g_malloc(sizeof(*wp), M_WAITOK | M_ZERO);
 	wp->bio = bp;
 	wp->waiting = NULL;
 	wp->parity = NULL;
 	TAILQ_INIT(&wp->bits);
 
 	if (bp->bio_pflags & GV_BIO_REBUILD)
 		err = gv_raid5_rebuild(p, wp, bp, addr, boff, bcount);
 	else if (bp->bio_pflags & GV_BIO_CHECK)
 		err = gv_raid5_check(p, wp, bp, addr, boff, bcount);
 	else
 		err = gv_raid5_request(p, wp, bp, addr, boff, bcount, &delay);
 
 	/* Means we have a delayed request. */
 	if (delay) {
 		g_free(wp);
 		return (NULL);
 	}
 	
 	/*
 	 * Building the sub-request failed, we probably need to clean up a lot.
 	 */
 	if (err) {
 		G_VINUM_LOGREQ(0, bp, "raid5 plex request failed.");
 		TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) {
 			TAILQ_REMOVE(&wp->bits, bq, queue);
 			g_free(bq);
 		}
 		if (wp->waiting != NULL) {
 			if (wp->waiting->bio_cflags & GV_BIO_MALLOC)
 				g_free(wp->waiting->bio_data);
 			g_destroy_bio(wp->waiting);
 		}
 		if (wp->parity != NULL) {
 			if (wp->parity->bio_cflags & GV_BIO_MALLOC)
 				g_free(wp->parity->bio_data);
 			g_destroy_bio(wp->parity);
 		}
 		g_free(wp);
 
 		TAILQ_FOREACH_SAFE(wp, &p->packets, list, wp2) {
 			if (wp->bio != bp)
 				continue;
 
 			TAILQ_REMOVE(&p->packets, wp, list);
 			TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) {
 				TAILQ_REMOVE(&wp->bits, bq, queue);
 				g_free(bq);
 			}
 			g_free(wp);
 		}
 
 		cbp = bioq_takefirst(p->bqueue);
 		while (cbp != NULL) {
 			if (cbp->bio_cflags & GV_BIO_MALLOC)
 				g_free(cbp->bio_data);
 			g_destroy_bio(cbp);
 			cbp = bioq_takefirst(p->bqueue);
 		}
 
 		/* If internal, stop and reset state. */
 		if (bp->bio_pflags & GV_BIO_INTERNAL) {
 			if (bp->bio_pflags & GV_BIO_MALLOC)
 				g_free(bp->bio_data);
 			g_destroy_bio(bp);
 			/* Reset flags. */
 			p->flags &= ~(GV_PLEX_SYNCING | GV_PLEX_REBUILDING |
 			    GV_PLEX_GROWING);
 			return (NULL);
 		}
 		g_io_deliver(bp, err);
 		return (NULL);
 	}
 
 	return (wp);
 }
 
 /*
  * Check if the stripe that the work packet wants is already being used by
  * some other work packet.
  */
 int
 gv_stripe_active(struct gv_plex *p, struct bio *bp)
 {
 	struct gv_raid5_packet *wp, *owp;
 	int overlap;
 
 	wp = bp->bio_caller2;
 	if (wp->lockbase == -1)
 		return (0);
 
 	overlap = 0;
 	TAILQ_FOREACH(owp, &p->packets, list) {
 		if (owp == wp)
 			break;
 		if ((wp->lockbase >= owp->lockbase) &&
 		    (wp->lockbase <= owp->lockbase + owp->length)) {
 			overlap++;
 			break;
 		}
 		if ((wp->lockbase <= owp->lockbase) &&
 		    (wp->lockbase + wp->length >= owp->lockbase)) {
 			overlap++;
 			break;
 		}
 	}
 
 	return (overlap);
 }
 
 static int
 gv_raid5_check(struct gv_plex *p, struct gv_raid5_packet *wp, struct bio *bp,
     caddr_t addr, off_t boff, off_t bcount)
 {
 	struct gv_sd *parity, *s;
 	struct gv_bioq *bq;
 	struct bio *cbp;
 	int i, psdno;
 	off_t real_len, real_off;
 
 	if (p == NULL || LIST_EMPTY(&p->subdisks))
 		return (ENXIO);
 
 	gv_raid5_offset(p, boff, bcount, &real_off, &real_len, NULL, &psdno, 1);
 
 	/* Find the right subdisk. */
 	parity = NULL;
 	i = 0;
 	LIST_FOREACH(s, &p->subdisks, in_plex) {
 		if (i == psdno) {
 			parity = s;
 			break;
 		}
 		i++;
 	}
 
 	/* Parity stripe not found. */
 	if (parity == NULL)
 		return (ENXIO);
 
 	if (parity->state != GV_SD_UP)
 		return (ENXIO);
 
 	wp->length = real_len;
 	wp->data = addr;
 	wp->lockbase = real_off;
 
 	/* Read all subdisks. */
 	LIST_FOREACH(s, &p->subdisks, in_plex) {
 		/* Skip the parity subdisk. */
 		if (s == parity)
 			continue;
 		/* Skip growing subdisks. */
 		if (s->flags & GV_SD_GROW)
 			continue;
 
 		cbp = gv_raid5_clone_bio(bp, s, wp, NULL, 1);
 		if (cbp == NULL)
 			return (ENOMEM);
 		cbp->bio_cmd = BIO_READ;
 
 		bioq_insert_tail(p->bqueue, cbp);
 
 		bq = g_malloc(sizeof(*bq), M_WAITOK | M_ZERO);
 		bq->bp = cbp;
 		TAILQ_INSERT_TAIL(&wp->bits, bq, queue);
 	}
 
 	/* Read the parity data. */
 	cbp = gv_raid5_clone_bio(bp, parity, wp, NULL, 1);
 	if (cbp == NULL)
 		return (ENOMEM);
 	cbp->bio_cmd = BIO_READ;
 	wp->waiting = cbp;
 
 	/*
 	 * In case we want to rebuild the parity, create an extra BIO to write
 	 * it out.  It also acts as buffer for the XOR operations.
 	 */
 	cbp = gv_raid5_clone_bio(bp, parity, wp, addr, 1);
 	if (cbp == NULL)
 		return (ENOMEM);
 	wp->parity = cbp;
 
 	return (0);
 }
 
 /* Rebuild a degraded RAID5 plex. */
 static int
 gv_raid5_rebuild(struct gv_plex *p, struct gv_raid5_packet *wp, struct bio *bp,
     caddr_t addr, off_t boff, off_t bcount)
 {
 	struct gv_sd *broken, *s;
 	struct gv_bioq *bq;
 	struct bio *cbp;
 	off_t real_len, real_off;
 
 	if (p == NULL || LIST_EMPTY(&p->subdisks))
 		return (ENXIO);
 
 	gv_raid5_offset(p, boff, bcount, &real_off, &real_len, NULL, NULL, 1);
 
 	/* Find the right subdisk. */
 	broken = NULL;
 	LIST_FOREACH(s, &p->subdisks, in_plex) {
 		if (s->state != GV_SD_UP)
 			broken = s;
 	}
 
 	/* Broken stripe not found. */
 	if (broken == NULL)
 		return (ENXIO);
 
 	switch (broken->state) {
 	case GV_SD_UP:
 		return (EINVAL);
 
 	case GV_SD_STALE:
 		if (!(bp->bio_pflags & GV_BIO_REBUILD))
 			return (ENXIO);
 
 		G_VINUM_DEBUG(1, "sd %s is reviving", broken->name);
 		gv_set_sd_state(broken, GV_SD_REVIVING, GV_SETSTATE_FORCE);
 		/* Set this bit now, but should be set at end. */
 		broken->flags |= GV_SD_CANGOUP;
 		break;
 
 	case GV_SD_REVIVING:
 		break;
 
 	default:
 		/* All other subdisk states mean it's not accessible. */
 		return (ENXIO);
 	}
 
 	wp->length = real_len;
 	wp->data = addr;
 	wp->lockbase = real_off;
 
 	KASSERT(wp->length >= 0, ("gv_rebuild_raid5: wp->length < 0"));
 
 	/* Read all subdisks. */
 	LIST_FOREACH(s, &p->subdisks, in_plex) {
 		/* Skip the broken subdisk. */
 		if (s == broken)
 			continue;
 
 		/* Skip growing subdisks. */
 		if (s->flags & GV_SD_GROW)
 			continue;
 
 		cbp = gv_raid5_clone_bio(bp, s, wp, NULL, 1);
 		if (cbp == NULL)
 			return (ENOMEM);
 		cbp->bio_cmd = BIO_READ;
 
 		bioq_insert_tail(p->bqueue, cbp);
 
 		bq = g_malloc(sizeof(*bq), M_WAITOK | M_ZERO);
 		bq->bp = cbp;
 		TAILQ_INSERT_TAIL(&wp->bits, bq, queue);
 	}
 
 	/* Write the parity data. */
 	cbp = gv_raid5_clone_bio(bp, broken, wp, NULL, 1);
 	if (cbp == NULL)
 		return (ENOMEM);
 	wp->parity = cbp;
 
 	p->synced = boff;
 
 	/* Post notification that we're finished. */
 	return (0);
 }
 
 /* Build a request group to perform (part of) a RAID5 request. */
 static int
 gv_raid5_request(struct gv_plex *p, struct gv_raid5_packet *wp,
     struct bio *bp, caddr_t addr, off_t boff, off_t bcount, int *delay)
 {
 	struct g_geom *gp;
 	struct gv_sd *broken, *original, *parity, *s;
 	struct gv_bioq *bq;
 	struct bio *cbp;
 	int i, psdno, sdno, type, grow;
 	off_t real_len, real_off;
 
 	gp = bp->bio_to->geom;
 
 	if (p == NULL || LIST_EMPTY(&p->subdisks))
 		return (ENXIO);
 
 	/* We are optimistic and assume that this request will be OK. */
 #define	REQ_TYPE_NORMAL		0
 #define	REQ_TYPE_DEGRADED	1
 #define	REQ_TYPE_NOPARITY	2
 
 	type = REQ_TYPE_NORMAL;
 	original = parity = broken = NULL;
 
 	/* XXX: The resize won't crash with rebuild or sync, but we should still
 	 * be aware of it. Also this should perhaps be done on rebuild/check as
 	 * well?
 	 */
 	/* If we're over, we must use the old. */ 
 	if (boff >= p->synced) {
 		grow = 1;
 	/* Or if over the resized offset, we use all drives. */
 	} else if (boff + bcount <= p->synced) {
 		grow = 0;
 	/* Else, we're in the middle, and must wait a bit. */
 	} else {
 		bioq_disksort(p->rqueue, bp);
 		*delay = 1;
 		return (0);
 	}
 	gv_raid5_offset(p, boff, bcount, &real_off, &real_len,
 	    &sdno, &psdno, grow);
 
 	/* Find the right subdisks. */
 	i = 0;
 	LIST_FOREACH(s, &p->subdisks, in_plex) {
 		if (i == sdno)
 			original = s;
 		if (i == psdno)
 			parity = s;
 		if (s->state != GV_SD_UP)
 			broken = s;
 		i++;
 	}
 
 	if ((original == NULL) || (parity == NULL))
 		return (ENXIO);
 
 	/* Our data stripe is missing. */
 	if (original->state != GV_SD_UP)
 		type = REQ_TYPE_DEGRADED;
 
 	/* If synchronizing request, just write it if disks are stale. */
 	if (original->state == GV_SD_STALE && parity->state == GV_SD_STALE &&
 	    bp->bio_pflags & GV_BIO_SYNCREQ && bp->bio_cmd == BIO_WRITE) {
 		type = REQ_TYPE_NORMAL;
 	/* Our parity stripe is missing. */
 	} else if (parity->state != GV_SD_UP) {
 		/* We cannot take another failure if we're already degraded. */
 		if (type != REQ_TYPE_NORMAL)
 			return (ENXIO);
 		else
 			type = REQ_TYPE_NOPARITY;
 	}
 
 	wp->length = real_len;
 	wp->data = addr;
 	wp->lockbase = real_off;
 
 	KASSERT(wp->length >= 0, ("gv_build_raid5_request: wp->length < 0"));
 
 	if ((p->flags & GV_PLEX_REBUILDING) && (boff + real_len < p->synced))
 		type = REQ_TYPE_NORMAL;
 
 	if ((p->flags & GV_PLEX_REBUILDING) && (boff + real_len >= p->synced)) {
 		bioq_disksort(p->rqueue, bp);
 		*delay = 1;
 		return (0);
 	}
 
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 		/*
 		 * For a degraded read we need to read in all stripes except
 		 * the broken one plus the parity stripe and then recalculate
 		 * the desired data.
 		 */
 		if (type == REQ_TYPE_DEGRADED) {
 			bzero(wp->data, wp->length);
 			LIST_FOREACH(s, &p->subdisks, in_plex) {
 				/* Skip the broken subdisk. */
 				if (s == broken)
 					continue;
 				/* Skip growing if within offset. */
 				if (grow && s->flags & GV_SD_GROW)
 					continue;
 				cbp = gv_raid5_clone_bio(bp, s, wp, NULL, 1);
 				if (cbp == NULL)
 					return (ENOMEM);
 
 				bioq_insert_tail(p->bqueue, cbp);
 
 				bq = g_malloc(sizeof(*bq), M_WAITOK | M_ZERO);
 				bq->bp = cbp;
 				TAILQ_INSERT_TAIL(&wp->bits, bq, queue);
 			}
 
 		/* A normal read can be fulfilled with the original subdisk. */
 		} else {
 			cbp = gv_raid5_clone_bio(bp, original, wp, addr, 0);
 			if (cbp == NULL)
 				return (ENOMEM);
 
 			bioq_insert_tail(p->bqueue, cbp);
 		}
 		wp->lockbase = -1;
 
 		break;
 
 	case BIO_WRITE:
 		/*
 		 * A degraded write means we cannot write to the original data
 		 * subdisk.  Thus we need to read in all valid stripes,
 		 * recalculate the parity from the original data, and then
 		 * write the parity stripe back out.
 		 */
 		if (type == REQ_TYPE_DEGRADED) {
 			/* Read all subdisks. */
 			LIST_FOREACH(s, &p->subdisks, in_plex) {
 				/* Skip the broken and the parity subdisk. */
 				if ((s == broken) || (s == parity))
 					continue;
 				/* Skip growing if within offset. */
 				if (grow && s->flags & GV_SD_GROW)
 					continue;
 
 				cbp = gv_raid5_clone_bio(bp, s, wp, NULL, 1);
 				if (cbp == NULL)
 					return (ENOMEM);
 				cbp->bio_cmd = BIO_READ;
 
 				bioq_insert_tail(p->bqueue, cbp);
 
 				bq = g_malloc(sizeof(*bq), M_WAITOK | M_ZERO);
 				bq->bp = cbp;
 				TAILQ_INSERT_TAIL(&wp->bits, bq, queue);
 			}
 
 			/* Write the parity data. */
 			cbp = gv_raid5_clone_bio(bp, parity, wp, NULL, 1);
 			if (cbp == NULL)
 				return (ENOMEM);
 			bcopy(addr, cbp->bio_data, wp->length);
 			wp->parity = cbp;
 
 		/*
 		 * When the parity stripe is missing we just write out the data.
 		 */
 		} else if (type == REQ_TYPE_NOPARITY) {
 			cbp = gv_raid5_clone_bio(bp, original, wp, addr, 1);
 			if (cbp == NULL)
 				return (ENOMEM);
 
 			bioq_insert_tail(p->bqueue, cbp);
 
 			bq = g_malloc(sizeof(*bq), M_WAITOK | M_ZERO);
 			bq->bp = cbp;
 			TAILQ_INSERT_TAIL(&wp->bits, bq, queue);
 
 		/*
 		 * A normal write request goes to the original subdisk, then we
 		 * read in all other stripes, recalculate the parity and write
 		 * out the parity again.
 		 */
 		} else {
 			/* Read old parity. */
 			cbp = gv_raid5_clone_bio(bp, parity, wp, NULL, 1);
 			if (cbp == NULL)
 				return (ENOMEM);
 			cbp->bio_cmd = BIO_READ;
 
 			bioq_insert_tail(p->bqueue, cbp);
 
 			bq = g_malloc(sizeof(*bq), M_WAITOK | M_ZERO);
 			bq->bp = cbp;
 			TAILQ_INSERT_TAIL(&wp->bits, bq, queue);
 
 			/* Read old data. */
 			cbp = gv_raid5_clone_bio(bp, original, wp, NULL, 1);
 			if (cbp == NULL)
 				return (ENOMEM);
 			cbp->bio_cmd = BIO_READ;
 
 			bioq_insert_tail(p->bqueue, cbp);
 
 			bq = g_malloc(sizeof(*bq), M_WAITOK | M_ZERO);
 			bq->bp = cbp;
 			TAILQ_INSERT_TAIL(&wp->bits, bq, queue);
 
 			/* Write new data. */
 			cbp = gv_raid5_clone_bio(bp, original, wp, addr, 1);
 			if (cbp == NULL)
 				return (ENOMEM);
 
 			/*
 			 * We must not write the new data until the old data
 			 * was read, so hold this BIO back until we're ready
 			 * for it.
 			 */
 			wp->waiting = cbp;
 
 			/* The final bio for the parity. */
 			cbp = gv_raid5_clone_bio(bp, parity, wp, NULL, 1);
 			if (cbp == NULL)
 				return (ENOMEM);
 
 			/* Remember that this is the BIO for the parity data. */
 			wp->parity = cbp;
 		}
 		break;
 
 	default:
 		return (EINVAL);
 	}
 
 	return (0);
 }
 
 /*
  * Calculate the offsets in the various subdisks for a RAID5 request. Also take
  * care of new subdisks in an expanded RAID5 array. 
  * XXX: This assumes that the new subdisks are inserted after the others (which
  * is okay as long as plex_offset is larger). If subdisks are inserted into the
  * plexlist before, we get problems.
  */
 static int
 gv_raid5_offset(struct gv_plex *p, off_t boff, off_t bcount, off_t *real_off,
     off_t *real_len, int *sdno, int *psdno, int growing)
 {
 	struct gv_sd *s;
 	int sd, psd, sdcount;
 	off_t len_left, stripeend, stripeoff, stripestart;
 
 	sdcount = p->sdcount;
 	if (growing) {
 		LIST_FOREACH(s, &p->subdisks, in_plex) {
 			if (s->flags & GV_SD_GROW)
 				sdcount--;
 		}
 	}
 
 	/* The number of the subdisk containing the parity stripe. */
 	psd = sdcount - 1 - ( boff / (p->stripesize * (sdcount - 1))) %
 	    sdcount;
 	KASSERT(psdno >= 0, ("gv_raid5_offset: psdno < 0"));
 
 	/* Offset of the start address from the start of the stripe. */
 	stripeoff = boff % (p->stripesize * (sdcount - 1));
 	KASSERT(stripeoff >= 0, ("gv_raid5_offset: stripeoff < 0"));
 
 	/* The number of the subdisk where the stripe resides. */
 	sd = stripeoff / p->stripesize;
 	KASSERT(sdno >= 0, ("gv_raid5_offset: sdno < 0"));
 
 	/* At or past parity subdisk. */
 	if (sd >= psd)
 		sd++;
 
 	/* The offset of the stripe on this subdisk. */
 	stripestart = (boff - stripeoff) / (sdcount - 1);
 	KASSERT(stripestart >= 0, ("gv_raid5_offset: stripestart < 0"));
 
 	stripeoff %= p->stripesize;
 
 	/* The offset of the request on this subdisk. */
 	*real_off = stripestart + stripeoff;
 
 	stripeend = stripestart + p->stripesize;
 	len_left = stripeend - *real_off;
 	KASSERT(len_left >= 0, ("gv_raid5_offset: len_left < 0"));
 
 	*real_len = (bcount <= len_left) ? bcount : len_left;
 
 	if (sdno != NULL)
 		*sdno = sd;
 	if (psdno != NULL)
 		*psdno = psd;
 
 	return (0);
 }
 
 static struct bio *
 gv_raid5_clone_bio(struct bio *bp, struct gv_sd *s, struct gv_raid5_packet *wp,
     caddr_t addr, int use_wp)
 {
 	struct bio *cbp;
 
 	cbp = g_clone_bio(bp);
 	if (cbp == NULL)
 		return (NULL);
 	if (addr == NULL) {
 		cbp->bio_data = g_malloc(wp->length, M_WAITOK | M_ZERO);
 		cbp->bio_cflags |= GV_BIO_MALLOC;
 	} else
 		cbp->bio_data = addr;
 	cbp->bio_offset = wp->lockbase + s->drive_offset;
 	cbp->bio_length = wp->length;
 	cbp->bio_done = gv_done;
 	cbp->bio_caller1 = s;
 	if (use_wp)
 		cbp->bio_caller2 = wp;
 
 	return (cbp);
 }
Index: head/sys/geom/vinum/geom_vinum_rename.c
===================================================================
--- head/sys/geom/vinum/geom_vinum_rename.c	(revision 350693)
+++ head/sys/geom/vinum/geom_vinum_rename.c	(revision 350694)
@@ -1,263 +1,264 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  *  Copyright (c) 2005 Chris Jones
  *  All rights reserved.
  *
  * This software was developed for the FreeBSD Project by Chris Jones
  * thanks to the support of Google's Summer of Code program and
  * mentoring by Lukas Ertl.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/libkern.h>
 #include <sys/malloc.h>
 
 #include <geom/geom.h>
+#include <geom/geom_dbg.h>
 #include <geom/vinum/geom_vinum_var.h>
 #include <geom/vinum/geom_vinum.h>
 
 void
 gv_rename(struct g_geom *gp, struct gctl_req *req)
 {
 	struct gv_softc *sc;
 	struct gv_volume *v;
 	struct gv_plex *p;
 	struct gv_sd *s;
 	struct gv_drive *d;
 	char *newname, *object, *name;
 	int *flags, type;
 
 	sc = gp->softc;
 
 	flags = gctl_get_paraml(req, "flags", sizeof(*flags));
 	if (flags == NULL) {
 		gctl_error(req, "no flags given");
 		return;
 	}
 
 	newname = gctl_get_param(req, "newname", NULL);
 	if (newname == NULL) {
 		gctl_error(req, "no new name given");
 		return;
 	}
 
 	object = gctl_get_param(req, "object", NULL);
 	if (object == NULL) {
 		gctl_error(req, "no object given");
 		return;
 	}
 
 	type = gv_object_type(sc, object);
 	switch (type) {
 	case GV_TYPE_VOL:
 		v = gv_find_vol(sc, object);
 		if (v == NULL) 	{
 			gctl_error(req, "unknown volume '%s'", object);
 			return;
 		}
 		name = g_malloc(GV_MAXVOLNAME, M_WAITOK | M_ZERO);
 		strlcpy(name, newname, GV_MAXVOLNAME);
 		gv_post_event(sc, GV_EVENT_RENAME_VOL, v, name, *flags, 0);
 		break;
 	case GV_TYPE_PLEX:
 		p = gv_find_plex(sc, object);
 		if (p == NULL) {
 			gctl_error(req, "unknown plex '%s'", object);
 			return;
 		}
 		name = g_malloc(GV_MAXPLEXNAME, M_WAITOK | M_ZERO);
 		strlcpy(name, newname, GV_MAXPLEXNAME);
 		gv_post_event(sc, GV_EVENT_RENAME_PLEX, p, name, *flags, 0);
 		break;
 	case GV_TYPE_SD:
 		s = gv_find_sd(sc, object);
 		if (s == NULL) {
 			gctl_error(req, "unknown subdisk '%s'", object);
 			return;
 		}
 		name = g_malloc(GV_MAXSDNAME, M_WAITOK | M_ZERO);
 		strlcpy(name, newname, GV_MAXSDNAME);
 		gv_post_event(sc, GV_EVENT_RENAME_SD, s, name, *flags, 0);
 		break;
 	case GV_TYPE_DRIVE:
 		d = gv_find_drive(sc, object);
 		if (d == NULL) {
 			gctl_error(req, "unknown drive '%s'", object);
 			return;
 		}
 		name = g_malloc(GV_MAXDRIVENAME, M_WAITOK | M_ZERO);
 		strlcpy(name, newname, GV_MAXDRIVENAME);
 		gv_post_event(sc, GV_EVENT_RENAME_DRIVE, d, name, *flags, 0);
 		break;
 	default:
 		gctl_error(req, "unknown object '%s'", object);
 		return;
 	}
 }
 
 int
 gv_rename_drive(struct gv_softc *sc, struct gv_drive *d, char *newname,
     int flags)
 {
 	struct gv_sd *s;
 
 	KASSERT(d != NULL, ("gv_rename_drive: NULL d"));
 
 	if (gv_object_type(sc, newname) != GV_ERR_NOTFOUND) {
 		G_VINUM_DEBUG(1, "drive name '%s' already in use", newname);
 		return (GV_ERR_NAMETAKEN);
 	}
 
 	strlcpy(d->name, newname, sizeof(d->name));
 	if (d->hdr != NULL)
 		strlcpy(d->hdr->label.name, newname, sizeof(d->hdr->label.name));
 
 	LIST_FOREACH(s, &d->subdisks, from_drive)
 		strlcpy(s->drive, d->name, sizeof(s->drive));
 
 	return (0);
 }
 
 int
 gv_rename_plex(struct gv_softc *sc, struct gv_plex *p, char *newname, int flags)
 {
 	char newsd[GV_MAXSDNAME];
 	struct gv_sd *s;
 	char *ptr;
 	int err;
 
 	KASSERT(p != NULL, ("gv_rename_plex: NULL p"));
 
 	if (gv_object_type(sc, newname) != GV_ERR_NOTFOUND) {
 		G_VINUM_DEBUG(1, "plex name '%s' already in use", newname);
 		return (GV_ERR_NAMETAKEN);
 	}
 
 	/*
 	 * Locate the plex number part of the plex names.
 	 * XXX: might be a good idea to sanitize input a bit more
 	 */
 	ptr = strrchr(newname, '.');
 	if (ptr == NULL) {
 		G_VINUM_DEBUG(0, "proposed plex name '%s' is not a valid plex "
 		    "name", newname);
 		return (GV_ERR_INVNAME);
 	}
 
 	strlcpy(p->name, newname, sizeof(p->name));
 
 	/* Fix up references and potentially rename subdisks. */
 	LIST_FOREACH(s, &p->subdisks, in_plex) {
 		strlcpy(s->plex, p->name, sizeof(s->plex));
 		if (flags & GV_FLAG_R) {
 			/*
 			 * Look for the two last dots in the string, and assume
 			 * that the old value was ok.
 			 */
 			ptr = strrchr(s->name, '.');
 			if (ptr == NULL)
 				return (GV_ERR_INVNAME);
 			ptr++;
 			snprintf(newsd, sizeof(newsd), "%s.%s", p->name, ptr);
 			err = gv_rename_sd(sc, s, newsd, flags);
 			if (err)
 				return (err);
 		}
 	}
 	return (0);
 }
 
 /*
  * gv_rename_sd: renames a subdisk.  Note that the 'flags' argument is ignored,
  * since there are no structures below a subdisk.  Similarly, we don't have to
  * clean up any references elsewhere to the subdisk's name.
  */
 int
 gv_rename_sd(struct gv_softc *sc, struct gv_sd *s, char *newname, int flags)
 {
 	char *dot1, *dot2;
 
 	KASSERT(s != NULL, ("gv_rename_sd: NULL s"));
 
 	if (gv_object_type(sc, newname) != GV_ERR_NOTFOUND) {
 		G_VINUM_DEBUG(1, "subdisk name %s already in use", newname);
 		return (GV_ERR_NAMETAKEN);
 	}
 
 	/* Locate the sd number part of the sd names. */
 	dot1 = strchr(newname, '.');
 	if (dot1 == NULL || (dot2 = strchr(dot1 +  1, '.')) == NULL) {
 		G_VINUM_DEBUG(0, "proposed sd name '%s' is not a valid sd name",
 		    newname);
 		return (GV_ERR_INVNAME);
 	}
 	strlcpy(s->name, newname, sizeof(s->name));
 	return (0);
 }
 
 int
 gv_rename_vol(struct gv_softc *sc, struct gv_volume *v, char *newname,
     int flags)
 {
 	struct g_provider *pp;
 	struct gv_plex *p;
 	char newplex[GV_MAXPLEXNAME], *ptr;
 	int err;
 
 	KASSERT(v != NULL, ("gv_rename_vol: NULL v"));
 	pp = v->provider;
 	KASSERT(pp != NULL, ("gv_rename_vol: NULL pp"));
 
 	if (gv_object_type(sc, newname) != GV_ERR_NOTFOUND) {
 		G_VINUM_DEBUG(1, "volume name %s already in use", newname);
 		return (GV_ERR_NAMETAKEN);
 	}
 
 	/* Rename the volume. */
 	strlcpy(v->name, newname, sizeof(v->name));
 
 	/* Fix up references and potentially rename plexes. */
 	LIST_FOREACH(p, &v->plexes, in_volume) {
 		strlcpy(p->volume, v->name, sizeof(p->volume));
 		if (flags & GV_FLAG_R) {
 			/*
 			 * Look for the last dot in the string, and assume that
 			 * the old value was ok.
 			 */
 			ptr = strrchr(p->name, '.');
 			ptr++;
 			snprintf(newplex, sizeof(newplex), "%s.%s", v->name, ptr);
 			err = gv_rename_plex(sc, p, newplex, flags);
 			if (err)
 				return (err);
 		}
 	}
 
 	return (0);
 }
Index: head/sys/geom/vinum/geom_vinum_rm.c
===================================================================
--- head/sys/geom/vinum/geom_vinum_rm.c	(revision 350693)
+++ head/sys/geom/vinum/geom_vinum_rm.c	(revision 350694)
@@ -1,389 +1,390 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  *  Copyright (c) 2004, 2007 Lukas Ertl
  *  All rights reserved.
  * 
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 
  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/libkern.h>
 #include <sys/malloc.h>
 
 #include <geom/geom.h>
+#include <geom/geom_dbg.h>
 #include <geom/vinum/geom_vinum_var.h>
 #include <geom/vinum/geom_vinum.h>
 
 /* General 'remove' routine. */
 void
 gv_remove(struct g_geom *gp, struct gctl_req *req)
 {
 	struct gv_softc *sc;
 	struct gv_volume *v;
 	struct gv_plex *p;
 	struct gv_sd *s;
 	struct gv_drive *d;
 	int *argc, *flags;
 	char *argv, buf[20];
 	int i, type;
 
 	argc = gctl_get_paraml(req, "argc", sizeof(*argc));
 
 	if (argc == NULL || *argc == 0) {
 		gctl_error(req, "no arguments given");
 		return;
 	}
 
 	flags = gctl_get_paraml(req, "flags", sizeof(*flags));
 	if (flags == NULL) {
 		gctl_error(req, "no flags given");
 		return;
 	}
 
 	sc = gp->softc;
 
 	/* XXX config locking */
 
 	for (i = 0; i < *argc; i++) {
 		snprintf(buf, sizeof(buf), "argv%d", i);
 		argv = gctl_get_param(req, buf, NULL);
 		if (argv == NULL)
 			continue;
 		type = gv_object_type(sc, argv);
 		switch (type) {
 		case GV_TYPE_VOL:
 			v = gv_find_vol(sc, argv);
 
 			/*
 			 * If this volume has plexes, we want a recursive
 			 * removal.
 			 */
 			if (!LIST_EMPTY(&v->plexes) && !(*flags & GV_FLAG_R)) {
 				gctl_error(req, "volume '%s' has attached "
 				    "plexes - need recursive removal", v->name);
 				return;
 			}
 
 			gv_post_event(sc, GV_EVENT_RM_VOLUME, v, NULL, 0, 0);
 			break;
 
 		case GV_TYPE_PLEX:
 			p = gv_find_plex(sc, argv);
 
 			/*
 			 * If this plex has subdisks, we want a recursive
 			 * removal.
 			 */
 			if (!LIST_EMPTY(&p->subdisks) &&
 			    !(*flags & GV_FLAG_R)) {
 				gctl_error(req, "plex '%s' has attached "
 				    "subdisks - need recursive removal",
 				    p->name);
 				return;
 			}
 
 			/* Don't allow removal of the only plex of a volume. */
 			if (p->vol_sc != NULL && p->vol_sc->plexcount == 1) {
 				gctl_error(req, "plex '%s' is still attached "
 				    "to volume '%s'", p->name, p->volume);
 				return;
 			}
 
 			gv_post_event(sc, GV_EVENT_RM_PLEX, p, NULL, 0, 0);
 			break;
 
 		case GV_TYPE_SD:
 			s = gv_find_sd(sc, argv);
 
 			/* Don't allow removal if attached to a plex. */
 			if (s->plex_sc != NULL) {
 				gctl_error(req, "subdisk '%s' is still attached"
 				    " to plex '%s'", s->name, s->plex_sc->name);
 				return;
 			}
 
 			gv_post_event(sc, GV_EVENT_RM_SD, s, NULL, 0, 0);
 			break;
 
 		case GV_TYPE_DRIVE:
 			d = gv_find_drive(sc, argv);
 			/* We don't allow to remove open drives. */
 			if (gv_consumer_is_open(d->consumer) &&
 			    !(*flags & GV_FLAG_F)) {
 				gctl_error(req, "drive '%s' is open", d->name);
 				return;
 			}
 
 			/* A drive with subdisks needs a recursive removal. */
 /*			if (!LIST_EMPTY(&d->subdisks) &&
 			    !(*flags & GV_FLAG_R)) {
 				gctl_error(req, "drive '%s' still has subdisks"
 				    " - need recursive removal", d->name);
 				return;
 			}*/
 
 			gv_post_event(sc, GV_EVENT_RM_DRIVE, d, NULL, *flags,
 			    0);
 			break;
 
 		default:
 			gctl_error(req, "unknown object '%s'", argv);
 			return;
 		}
 	}
 
 	gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0);
 }
 
 /* Resets configuration */
 int
 gv_resetconfig(struct gv_softc *sc)
 {
 	struct gv_drive *d, *d2;
 	struct gv_volume *v, *v2;
 	struct gv_plex *p, *p2;
 	struct gv_sd *s, *s2;
 
 	/* First make sure nothing is open. */
         LIST_FOREACH_SAFE(d, &sc->drives, drive, d2) {
 		if (gv_consumer_is_open(d->consumer)) {
 			return (GV_ERR_ISBUSY);
 		}
 	}
 
 	/* Make sure nothing is going on internally. */
 	LIST_FOREACH_SAFE(p, &sc->plexes, plex, p2) {
 		if (p->flags & (GV_PLEX_REBUILDING | GV_PLEX_GROWING))
 			return (GV_ERR_ISBUSY);
 	}
 
 	/* Then if not, we remove everything. */
 	LIST_FOREACH_SAFE(s, &sc->subdisks, sd, s2)
 		gv_rm_sd(sc, s);
 	LIST_FOREACH_SAFE(d, &sc->drives, drive, d2)
 		gv_rm_drive(sc, d, 0);
 	LIST_FOREACH_SAFE(p, &sc->plexes, plex, p2)
 		gv_rm_plex(sc, p);
 	LIST_FOREACH_SAFE(v, &sc->volumes, volume, v2)
 		gv_rm_vol(sc, v);
 
 	gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0);
 
 	return (0);
 }
 
 /* Remove a volume. */
 void
 gv_rm_vol(struct gv_softc *sc, struct gv_volume *v)
 {
 	struct g_provider *pp;
 	struct gv_plex *p, *p2;
 
 	KASSERT(v != NULL, ("gv_rm_vol: NULL v"));
 	pp = v->provider;
 	KASSERT(pp != NULL, ("gv_rm_vol: NULL pp"));
 
 	/* Check if any of our consumers is open. */
 	if (gv_provider_is_open(pp)) {
 		G_VINUM_DEBUG(0, "unable to remove %s: volume still in use",
 		    v->name);
 		return;
 	}
 
 	/* Remove the plexes our volume has. */
 	LIST_FOREACH_SAFE(p, &v->plexes, in_volume, p2)
 		gv_rm_plex(sc, p);
 
 	/* Clean up. */
 	LIST_REMOVE(v, volume);
 	g_free(v);
 
 	/* Get rid of the volume's provider. */
 	if (pp != NULL) {
 		g_topology_lock();
 		g_wither_provider(pp, ENXIO);
 		g_topology_unlock();
 	}
 }
 
 /* Remove a plex. */
 void
 gv_rm_plex(struct gv_softc *sc, struct gv_plex *p)
 {
 	struct gv_volume *v;
 	struct gv_sd *s, *s2;
 
 	KASSERT(p != NULL, ("gv_rm_plex: NULL p"));
 	v = p->vol_sc;
 
 	/* Check if any of our consumers is open. */
 	if (v != NULL && gv_provider_is_open(v->provider) && v->plexcount < 2) {
 		G_VINUM_DEBUG(0, "unable to remove %s: volume still in use",
 		    p->name);
 		return;
 	}
 
 	/* Remove the subdisks our plex has. */
 	LIST_FOREACH_SAFE(s, &p->subdisks, in_plex, s2)
 		gv_rm_sd(sc, s);
 
 	v = p->vol_sc;
 	/* Clean up and let our geom fade away. */
 	LIST_REMOVE(p, plex);
 	if (p->vol_sc != NULL) {
 		p->vol_sc->plexcount--;
 		LIST_REMOVE(p, in_volume);
 		p->vol_sc = NULL;
 		/* Correctly update the volume size. */
 		gv_update_vol_size(v, gv_vol_size(v));
 	}
 
 	g_free(p);
 }
 
 /* Remove a subdisk. */
 void
 gv_rm_sd(struct gv_softc *sc, struct gv_sd *s)
 {
 	struct gv_plex *p;
 	struct gv_volume *v;
 
 	KASSERT(s != NULL, ("gv_rm_sd: NULL s"));
 
 	p = s->plex_sc;
 	v = NULL;
 
 	/* Clean up. */
 	if (p != NULL) {
 		LIST_REMOVE(s, in_plex);
 		s->plex_sc = NULL;
 		p->sdcount--;
 		/* Update the plexsize. */
 		p->size = gv_plex_size(p);
 		v = p->vol_sc;
 		if (v != NULL) {
 			/* Update the size of our plex' volume. */
 			gv_update_vol_size(v, gv_vol_size(v));
 		}
 	}
 	if (s->drive_sc && !(s->drive_sc->flags & GV_DRIVE_REFERENCED))
 		LIST_REMOVE(s, from_drive);
 	LIST_REMOVE(s, sd);
 	gv_free_sd(s);
 	g_free(s);
 }
 
 /* Remove a drive. */
 void
 gv_rm_drive(struct gv_softc *sc, struct gv_drive *d, int flags)
 {
 	struct g_consumer *cp;
 	struct gv_freelist *fl, *fl2;
 	struct gv_plex *p;
 	struct gv_sd *s, *s2;
 	struct gv_volume *v;
 	struct gv_drive *d2;
 	int err;
 
 	KASSERT(d != NULL, ("gv_rm_drive: NULL d"));
 
 	cp = d->consumer;
 
 	if (cp != NULL) {
 		g_topology_lock();
 		err = g_access(cp, 0, 1, 0);
 		g_topology_unlock();
 
 		if (err) {
 			G_VINUM_DEBUG(0, "%s: unable to access '%s', "
 			    "errno: %d", __func__, cp->provider->name, err);
 			return;
 		}
 
 		/* Clear the Vinum Magic. */
 		d->hdr->magic = GV_NOMAGIC;
 		err = gv_write_header(cp, d->hdr);
 		if (err)
 			G_VINUM_DEBUG(0, "gv_rm_drive: error writing header to"
 			    " '%s', errno: %d", cp->provider->name, err);
 
 		g_topology_lock();
 		g_access(cp, -cp->acr, -cp->acw, -cp->ace);
 		g_detach(cp);
 		g_destroy_consumer(cp);
 		g_topology_unlock();
 	}
 
 	/* Remove all associated subdisks, plexes, volumes. */
 	if (flags & GV_FLAG_R) {
 		if (!LIST_EMPTY(&d->subdisks)) {
 			LIST_FOREACH_SAFE(s, &d->subdisks, from_drive, s2) {
 				p = s->plex_sc;
 				if (p != NULL) {
 					v = p->vol_sc;
 					if (v != NULL)
 						gv_rm_vol(sc, v);
 				}
 			}
 		}
 	}
 
 	/* Clean up. */
 	LIST_FOREACH_SAFE(fl, &d->freelist, freelist, fl2) {
 		LIST_REMOVE(fl, freelist);
 		g_free(fl);
 	}
 
 	LIST_REMOVE(d, drive);
 	g_free(d->hdr);
 
 	/* Put ourself into referenced state if we have subdisks. */
 	if (d->sdcount > 0) {
 		d->consumer = NULL;
 		d->hdr = NULL;
 		d->flags |= GV_DRIVE_REFERENCED;
 		snprintf(d->device, sizeof(d->device), "???");
 		d->size = 0;
 		d->avail = 0;
 		d->freelist_entries = 0;
 		LIST_FOREACH(s, &d->subdisks, from_drive) {
 			s->flags |= GV_SD_TASTED;
 			gv_set_sd_state(s, GV_SD_DOWN, GV_SETSTATE_FORCE);
 		}
 		/* Shuffle around so we keep gv_is_newer happy. */
 		LIST_REMOVE(d, drive);
 		d2 = LIST_FIRST(&sc->drives);
 		if (d2 == NULL)
 			LIST_INSERT_HEAD(&sc->drives, d, drive);
 		else
 			LIST_INSERT_AFTER(d2, d, drive);
 		return;
 	}
 	g_free(d);
 
 	gv_save_config(sc);
 }
Index: head/sys/geom/vinum/geom_vinum_state.c
===================================================================
--- head/sys/geom/vinum/geom_vinum_state.c	(revision 350693)
+++ head/sys/geom/vinum/geom_vinum_state.c	(revision 350694)
@@ -1,536 +1,537 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2004, 2007 Lukas Ertl
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/libkern.h>
 #include <sys/malloc.h>
 
 #include <geom/geom.h>
+#include <geom/geom_dbg.h>
 #include <geom/vinum/geom_vinum_var.h>
 #include <geom/vinum/geom_vinum.h>
 #include <geom/vinum/geom_vinum_share.h>
 
 void
 gv_setstate(struct g_geom *gp, struct gctl_req *req)
 {
 	struct gv_softc *sc;
 	struct gv_sd *s;
 	struct gv_drive *d;
 	struct gv_volume *v;
 	struct gv_plex *p;
 	char *obj, *state;
 	int f, *flags, type;
 
 	f = 0;
 	obj = gctl_get_param(req, "object", NULL);
 	if (obj == NULL) {
 		gctl_error(req, "no object given");
 		return;
 	}
 
 	state = gctl_get_param(req, "state", NULL);
 	if (state == NULL) {
 		gctl_error(req, "no state given");
 		return;
 	}
 
 	flags = gctl_get_paraml(req, "flags", sizeof(*flags));
 	if (flags == NULL) {
 		gctl_error(req, "no flags given");
 		return;
 	}
 
 	if (*flags & GV_FLAG_F)
 		f = GV_SETSTATE_FORCE;
 
 	sc = gp->softc;
 	type = gv_object_type(sc, obj);
 	switch (type) {
 	case GV_TYPE_VOL:
 		if (gv_volstatei(state) < 0) {
 			gctl_error(req, "invalid volume state '%s'", state);
 			break;
 		}
 		v = gv_find_vol(sc, obj);
 		gv_post_event(sc, GV_EVENT_SET_VOL_STATE, v, NULL,
 		    gv_volstatei(state), f);
 		break;
 
 	case GV_TYPE_PLEX:
 		if (gv_plexstatei(state) < 0) {
 			gctl_error(req, "invalid plex state '%s'", state);
 			break;
 		}
 		p = gv_find_plex(sc, obj);
 		gv_post_event(sc, GV_EVENT_SET_PLEX_STATE, p, NULL,
 		    gv_plexstatei(state), f);
 		break;
 
 	case GV_TYPE_SD:
 		if (gv_sdstatei(state) < 0) {
 			gctl_error(req, "invalid subdisk state '%s'", state);
 			break;
 		}
 		s = gv_find_sd(sc, obj);
 		gv_post_event(sc, GV_EVENT_SET_SD_STATE, s, NULL,
 		    gv_sdstatei(state), f);
 		break;
 
 	case GV_TYPE_DRIVE:
 		if (gv_drivestatei(state) < 0) {
 			gctl_error(req, "invalid drive state '%s'", state);
 			break;
 		}
 		d = gv_find_drive(sc, obj);
 		gv_post_event(sc, GV_EVENT_SET_DRIVE_STATE, d, NULL,
 		    gv_drivestatei(state), f);
 		break;
 
 	default:
 		gctl_error(req, "unknown object '%s'", obj);
 		break;
 	}
 }
 
 /* Update drive state; return 0 if the state changes, otherwise error. */
 int
 gv_set_drive_state(struct gv_drive *d, int newstate, int flags)
 {
 	struct gv_sd *s;
 	int oldstate;
 
 	KASSERT(d != NULL, ("gv_set_drive_state: NULL d"));
 
 	oldstate = d->state;
 	
 	if (newstate == oldstate)
 		return (0);
 
 	/* We allow to take down an open drive only with force. */
 	if ((newstate == GV_DRIVE_DOWN) && gv_consumer_is_open(d->consumer) &&
 	    (!(flags & GV_SETSTATE_FORCE)))
 		return (GV_ERR_ISBUSY);
 
 	d->state = newstate;
 
 	if (d->state != oldstate) {
 		LIST_FOREACH(s, &d->subdisks, from_drive)
 			gv_update_sd_state(s);
 	}
 
 	/* Save the config back to disk. */
 	if (flags & GV_SETSTATE_CONFIG)
 		gv_save_config(d->vinumconf);
 
 	return (0);
 }
 
 int
 gv_set_sd_state(struct gv_sd *s, int newstate, int flags)
 {
 	struct gv_drive *d;
 	struct gv_plex *p;
 	int oldstate, status;
 
 	KASSERT(s != NULL, ("gv_set_sd_state: NULL s"));
 
 	oldstate = s->state;
 
 	/* We are optimistic and assume it will work. */
 	status = 0;
 	
 	if (newstate == oldstate)
 		return (0);
 
 	switch (newstate) {
 	case GV_SD_DOWN:
 		/*
 		 * If we're attached to a plex, we won't go down without use of
 		 * force.
 		 */
 		if ((s->plex_sc != NULL) && !(flags & GV_SETSTATE_FORCE))
 			return (GV_ERR_ISATTACHED);
 		break;
 
 	case GV_SD_REVIVING:
 	case GV_SD_INITIALIZING:
 		/*
 		 * Only do this if we're forced, since it usually is done
 		 * internally, and then we do use the force flag. 
 		 */
 		if (!(flags & GV_SETSTATE_FORCE))
 			return (GV_ERR_SETSTATE);
 		break;
 
 	case GV_SD_UP:
 		/* We can't bring the subdisk up if our drive is dead. */
 		d = s->drive_sc;
 		if ((d == NULL) || (d->state != GV_DRIVE_UP))
 			return (GV_ERR_SETSTATE);
 
 		/* Check from where we want to be brought up. */
 		switch (s->state) {
 		case GV_SD_REVIVING:
 		case GV_SD_INITIALIZING:
 			/*
 			 * The subdisk was initializing.  We allow it to be
 			 * brought up.
 			 */
 			break;
 
 		case GV_SD_DOWN:
 			/*
 			 * The subdisk is currently down.  We allow it to be
 			 * brought up if it is not attached to a plex.
 			 */
 			p = s->plex_sc;
 			if (p == NULL)
 				break;
 
 			/*
 			 * If this subdisk is attached to a plex, we allow it
 			 * to be brought up if the plex if it's not a RAID5
 			 * plex, otherwise it's made 'stale'.
 			 */
 
 			if (p->org != GV_PLEX_RAID5)
 				break;
 			else if (s->flags & GV_SD_CANGOUP) {
 				s->flags &= ~GV_SD_CANGOUP;
 				break;
 			} else if (flags & GV_SETSTATE_FORCE)
 				break;
 			else
 				s->state = GV_SD_STALE;
 
 			status = GV_ERR_SETSTATE;
 			break;
 
 		case GV_SD_STALE:
 			/*
 			 * A stale subdisk can be brought up only if it's part
 			 * of a concat or striped plex that's the only one in a
 			 * volume, or if the subdisk isn't attached to a plex.
 			 * Otherwise it needs to be revived or initialized
 			 * first.
 			 */
 			p = s->plex_sc;
 			if (p == NULL || flags & GV_SETSTATE_FORCE)
 				break;
 
 			if ((p->org != GV_PLEX_RAID5 &&
 			    p->vol_sc->plexcount == 1) ||
 			    (p->flags & GV_PLEX_SYNCING &&
 			    p->synced > 0 &&
 			    p->org == GV_PLEX_RAID5))
 				break;
 			else
 				return (GV_ERR_SETSTATE);
 
 		default:
 			return (GV_ERR_INVSTATE);
 		}
 		break;
 
 	/* Other state transitions are only possible with force. */
 	default:
 		if (!(flags & GV_SETSTATE_FORCE))
 			return (GV_ERR_SETSTATE);
 	}
 
 	/* We can change the state and do it. */
 	if (status == 0)
 		s->state = newstate;
 
 	/* Update our plex, if we're attached to one. */
 	if (s->plex_sc != NULL)
 		gv_update_plex_state(s->plex_sc);
 
 	/* Save the config back to disk. */
 	if (flags & GV_SETSTATE_CONFIG)
 		gv_save_config(s->vinumconf);
 
 	return (status);
 }
 
 int
 gv_set_plex_state(struct gv_plex *p, int newstate, int flags)
 {
 	struct gv_volume *v;
 	int oldstate, plexdown;
 
 	KASSERT(p != NULL, ("gv_set_plex_state: NULL p"));
 
 	oldstate = p->state;
 	v = p->vol_sc;
 	plexdown = 0;
 
 	if (newstate == oldstate)
 		return (0);
 
 	switch (newstate) {
 	case GV_PLEX_UP:
 		/* Let update_plex handle if the plex can come up */
 		gv_update_plex_state(p);
 		if (p->state != GV_PLEX_UP && !(flags & GV_SETSTATE_FORCE))
 			return (GV_ERR_SETSTATE);
 		p->state = newstate;
 		break;
 	case GV_PLEX_DOWN:
 		/*
 		 * Set state to GV_PLEX_DOWN only if no-one is using the plex,
 		 * or if the state is forced.
 		 */
 		if (v != NULL) {
 			/* If the only one up, force is needed. */
 			plexdown = gv_plexdown(v);
 			if ((v->plexcount == 1 ||
 			    (v->plexcount - plexdown == 1)) &&
 			    ((flags & GV_SETSTATE_FORCE) == 0))
 				return (GV_ERR_SETSTATE);
 		}
 		p->state = newstate;
 		break;
 	case GV_PLEX_DEGRADED:
 		/* Only used internally, so we have to be forced. */
 		if (flags & GV_SETSTATE_FORCE)
 			p->state = newstate;
 		break;
 	}
 
 	/* Update our volume if we have one. */
 	if (v != NULL)
 		gv_update_vol_state(v);
 
 	/* Save config. */
 	if (flags & GV_SETSTATE_CONFIG)
 		gv_save_config(p->vinumconf);
 	return (0);
 }
 
 int
 gv_set_vol_state(struct gv_volume *v, int newstate, int flags)
 {
 	int oldstate;
 
 	KASSERT(v != NULL, ("gv_set_vol_state: NULL v"));
 
 	oldstate = v->state;
 
 	if (newstate == oldstate)
 		return (0);
 
 	switch (newstate) {
 	case GV_VOL_UP:
 		/* Let update handle if the volume can come up. */
 		gv_update_vol_state(v);
 		if (v->state != GV_VOL_UP && !(flags & GV_SETSTATE_FORCE))
 			return (GV_ERR_SETSTATE);
 		v->state = newstate;
 		break;
 	case GV_VOL_DOWN:
 		/*
 		 * Set state to GV_VOL_DOWN only if no-one is using the volume,
 		 * or if the state should be forced.
 		 */
 		if (!gv_provider_is_open(v->provider) &&
 		    !(flags & GV_SETSTATE_FORCE))
 			return (GV_ERR_ISBUSY);
 		v->state = newstate;
 		break;
 	}
 	/* Save config */
 	if (flags & GV_SETSTATE_CONFIG)
 		gv_save_config(v->vinumconf);
 	return (0);
 }
 
 /* Update the state of a subdisk based on its environment. */
 void
 gv_update_sd_state(struct gv_sd *s)
 {
 	struct gv_drive *d;
 	int oldstate;
 
 	KASSERT(s != NULL, ("gv_update_sd_state: NULL s"));
 	d = s->drive_sc;
 	KASSERT(d != NULL, ("gv_update_sd_state: NULL d"));
 
 	oldstate = s->state;
 	
 	/* If our drive isn't up we cannot be up either. */
 	if (d->state != GV_DRIVE_UP) {
 		s->state = GV_SD_DOWN;
 	/* If this subdisk was just created, we assume it is good.*/
 	} else if (s->flags & GV_SD_NEWBORN) {
 		s->state = GV_SD_UP;
 		s->flags &= ~GV_SD_NEWBORN;
 	} else if (s->state != GV_SD_UP) {
 		if (s->flags & GV_SD_CANGOUP) {
 			s->state = GV_SD_UP;
 			s->flags &= ~GV_SD_CANGOUP;
 		} else
 			s->state = GV_SD_STALE;
 	} else
 		s->state = GV_SD_UP;
 	
 	if (s->state != oldstate)
 		G_VINUM_DEBUG(1, "subdisk %s state change: %s -> %s", s->name,
 		    gv_sdstate(oldstate), gv_sdstate(s->state));
 
 	/* Update the plex, if we have one. */
 	if (s->plex_sc != NULL)
 		gv_update_plex_state(s->plex_sc);
 }
 
 /* Update the state of a plex based on its environment. */
 void
 gv_update_plex_state(struct gv_plex *p)
 {
 	struct gv_sd *s;
 	int sdstates;
 	int oldstate;
 
 	KASSERT(p != NULL, ("gv_update_plex_state: NULL p"));
 
 	oldstate = p->state;
 
 	/* First, check the state of our subdisks. */
 	sdstates = gv_sdstatemap(p);
 	
 	/* If all subdisks are up, our plex can be up, too. */
 	if (sdstates == GV_SD_UPSTATE)
 		p->state = GV_PLEX_UP;
 
 	/* One or more of our subdisks are down. */
 	else if (sdstates & GV_SD_DOWNSTATE) {
 		/* A RAID5 plex can handle one dead subdisk. */
 		if ((p->org == GV_PLEX_RAID5) && (p->sddown == 1))
 			p->state = GV_PLEX_DEGRADED;
 		else
 			p->state = GV_PLEX_DOWN;
 
 	/* Some of our subdisks are initializing. */
 	} else if (sdstates & GV_SD_INITSTATE) {
 
 		if (p->flags & GV_PLEX_SYNCING ||
 		    p->flags & GV_PLEX_REBUILDING)
 			p->state = GV_PLEX_DEGRADED;
 		else
 			p->state = GV_PLEX_DOWN;
 	} else
 		p->state = GV_PLEX_DOWN;
 
 	if (p->state == GV_PLEX_UP) {
 		LIST_FOREACH(s, &p->subdisks, in_plex) {
 			if (s->flags & GV_SD_GROW) {
 				p->state = GV_PLEX_GROWABLE;
 				break;
 			}
 		}
 	}
 
 	if (p->state != oldstate)
 		G_VINUM_DEBUG(1, "plex %s state change: %s -> %s", p->name,
 		    gv_plexstate(oldstate), gv_plexstate(p->state));
 
 	/* Update our volume, if we have one. */
 	if (p->vol_sc != NULL)
 		gv_update_vol_state(p->vol_sc);
 }
 
 /* Update the volume state based on its plexes. */
 void
 gv_update_vol_state(struct gv_volume *v)
 {
 	struct gv_plex *p;
 
 	KASSERT(v != NULL, ("gv_update_vol_state: NULL v"));
 
 	/* The volume can't be up without plexes. */
 	if (v->plexcount == 0) {
 		v->state = GV_VOL_DOWN;
 		return;
 	}
 
 	LIST_FOREACH(p, &v->plexes, in_volume) {
 		/* One of our plexes is accessible, and so are we. */
 		if (p->state > GV_PLEX_DEGRADED) {
 			v->state = GV_VOL_UP;
 			return;
 
 		/* We can handle a RAID5 plex with one dead subdisk as well. */
 		} else if ((p->org == GV_PLEX_RAID5) &&
 		    (p->state == GV_PLEX_DEGRADED)) {
 			v->state = GV_VOL_UP;
 			return;
 		}
 	}
 
 	/* Not one of our plexes is up, so we can't be either. */
 	v->state = GV_VOL_DOWN;
 }
 
 /* Return a state map for the subdisks of a plex. */
 int
 gv_sdstatemap(struct gv_plex *p)
 {
 	struct gv_sd *s;
 	int statemap;
 
 	KASSERT(p != NULL, ("gv_sdstatemap: NULL p"));
 	
 	statemap = 0;
 	p->sddown = 0;	/* No subdisks down yet. */
 
 	LIST_FOREACH(s, &p->subdisks, in_plex) {
 		switch (s->state) {
 		case GV_SD_DOWN:
 		case GV_SD_STALE:
 			statemap |= GV_SD_DOWNSTATE;
 			p->sddown++;	/* Another unusable subdisk. */
 			break;
 
 		case GV_SD_UP:
 			statemap |= GV_SD_UPSTATE;
 			break;
 
 		case GV_SD_INITIALIZING:
 			statemap |= GV_SD_INITSTATE;
 			break;
 
 		case GV_SD_REVIVING:
 			statemap |= GV_SD_INITSTATE;
 			p->sddown++;	/* XXX: Another unusable subdisk? */
 			break;
 		}
 	}
 	return (statemap);
 }
Index: head/sys/geom/vinum/geom_vinum_subr.c
===================================================================
--- head/sys/geom/vinum/geom_vinum_subr.c	(revision 350693)
+++ head/sys/geom/vinum/geom_vinum_subr.c	(revision 350694)
@@ -1,1283 +1,1284 @@
 /*-
  * SPDX-License-Identifier: BSD-4-Clause
  *
  * Copyright (c) 2004, 2007 Lukas Ertl
  * Copyright (c) 2007, 2009 Ulf Lilleengen
  * Copyright (c) 1997, 1998, 1999
  *      Nan Yang Computer Services Limited.  All rights reserved.
  *
  *  Parts written by Greg Lehey
  *
  *  This software is distributed under the so-called ``Berkeley
  *  License'':
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *      This product includes software developed by Nan Yang Computer
  *      Services Limited.
  * 4. Neither the name of the Company nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * This software is provided ``as is'', and any express or implied
  * warranties, including, but not limited to, the implied warranties of
  * merchantability and fitness for a particular purpose are disclaimed.
  * In no event shall the company or contributors be liable for any
  * direct, indirect, incidental, special, exemplary, or consequential
  * damages (including, but not limited to, procurement of substitute
  * goods or services; loss of use, data, or profits; or business
  * interruption) however caused and on any theory of liability, whether
  * in contract, strict liability, or tort (including negligence or
  * otherwise) arising in any way out of the use of this software, even if
  * advised of the possibility of such damage.
  *
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/malloc.h>
 #include <sys/sbuf.h>
 #include <sys/systm.h>
 
 #include <geom/geom.h>
+#include <geom/geom_dbg.h>
 #include <geom/vinum/geom_vinum_var.h>
 #include <geom/vinum/geom_vinum.h>
 #include <geom/vinum/geom_vinum_share.h>
 
 int	gv_drive_is_newer(struct gv_softc *, struct gv_drive *);
 static off_t gv_plex_smallest_sd(struct gv_plex *);
 
 void
 gv_parse_config(struct gv_softc *sc, char *buf, struct gv_drive *d)
 {
 	char *aptr, *bptr, *cptr;
 	struct gv_volume *v, *v2;
 	struct gv_plex *p, *p2;
 	struct gv_sd *s, *s2;
 	int error, is_newer, tokens;
 	char *token[GV_MAXARGS];
 
 	is_newer = gv_drive_is_newer(sc, d);
 
 	/* Until the end of the string *buf. */
 	for (aptr = buf; *aptr != '\0'; aptr = bptr) {
 		bptr = aptr;
 		cptr = aptr;
 
 		/* Separate input lines. */
 		while (*bptr != '\n')
 			bptr++;
 		*bptr = '\0';
 		bptr++;
 
 		tokens = gv_tokenize(cptr, token, GV_MAXARGS);
 
 		if (tokens <= 0)
 			continue;
 
 		if (!strcmp(token[0], "volume")) {
 			v = gv_new_volume(tokens, token);
 			if (v == NULL) {
 				G_VINUM_DEBUG(0, "config parse failed volume");
 				break;
 			}
 
 			v2 = gv_find_vol(sc, v->name);
 			if (v2 != NULL) {
 				if (is_newer) {
 					v2->state = v->state;
 					G_VINUM_DEBUG(2, "newer volume found!");
 				}
 				g_free(v);
 				continue;
 			}
 
 			gv_create_volume(sc, v);
 
 		} else if (!strcmp(token[0], "plex")) {
 			p = gv_new_plex(tokens, token);
 			if (p == NULL) {
 				G_VINUM_DEBUG(0, "config parse failed plex");
 				break;
 			}
 
 			p2 = gv_find_plex(sc, p->name);
 			if (p2 != NULL) {
 				/* XXX */
 				if (is_newer) {
 					p2->state = p->state;
 					G_VINUM_DEBUG(2, "newer plex found!");
 				}
 				g_free(p);
 				continue;
 			}
 
 			error = gv_create_plex(sc, p);
 			if (error)
 				continue;
 			/*
 			 * These flags were set in gv_create_plex() and are not
 			 * needed here (on-disk config parsing).
 			 */
 			p->flags &= ~GV_PLEX_ADDED;
 
 		} else if (!strcmp(token[0], "sd")) {
 			s = gv_new_sd(tokens, token);
 
 			if (s == NULL) {
 				G_VINUM_DEBUG(0, "config parse failed subdisk");
 				break;
 			}
 
 			s2 = gv_find_sd(sc, s->name);
 			if (s2 != NULL) {
 				/* XXX */
 				if (is_newer) {
 					s2->state = s->state;
 					G_VINUM_DEBUG(2, "newer subdisk found!");
 				}
 				g_free(s);
 				continue;
 			}
 
 			/*
 			 * Signal that this subdisk was tasted, and could
 			 * possibly reference a drive that isn't in our config
 			 * yet.
 			 */
 			s->flags |= GV_SD_TASTED;
 
 			if (s->state == GV_SD_UP)
 				s->flags |= GV_SD_CANGOUP;
 
 			error = gv_create_sd(sc, s);
 			if (error)
 				continue;
 
 			/*
 			 * This flag was set in gv_create_sd() and is not
 			 * needed here (on-disk config parsing).
 			 */
 			s->flags &= ~GV_SD_NEWBORN;
 			s->flags &= ~GV_SD_GROW;
 		}
 	}
 }
 
 /*
  * Format the vinum configuration properly.  If ondisk is non-zero then the
  * configuration is intended to be written to disk later.
  */
 void
 gv_format_config(struct gv_softc *sc, struct sbuf *sb, int ondisk, char *prefix)
 {
 	struct gv_drive *d;
 	struct gv_sd *s;
 	struct gv_plex *p;
 	struct gv_volume *v;
 
 	/*
 	 * We don't need the drive configuration if we're not writing the
 	 * config to disk.
 	 */
 	if (!ondisk) {
 		LIST_FOREACH(d, &sc->drives, drive) {
 			sbuf_printf(sb, "%sdrive %s device /dev/%s\n", prefix,
 			    d->name, d->device);
 		}
 	}
 
 	LIST_FOREACH(v, &sc->volumes, volume) {
 		if (!ondisk)
 			sbuf_printf(sb, "%s", prefix);
 		sbuf_printf(sb, "volume %s", v->name);
 		if (ondisk)
 			sbuf_printf(sb, " state %s", gv_volstate(v->state));
 		sbuf_printf(sb, "\n");
 	}
 
 	LIST_FOREACH(p, &sc->plexes, plex) {
 		if (!ondisk)
 			sbuf_printf(sb, "%s", prefix);
 		sbuf_printf(sb, "plex name %s org %s ", p->name,
 		    gv_plexorg(p->org));
 		if (gv_is_striped(p))
 			sbuf_printf(sb, "%ds ", p->stripesize / 512);
 		if (p->vol_sc != NULL)
 			sbuf_printf(sb, "vol %s", p->volume);
 		if (ondisk)
 			sbuf_printf(sb, " state %s", gv_plexstate(p->state));
 		sbuf_printf(sb, "\n");
 	}
 
 	LIST_FOREACH(s, &sc->subdisks, sd) {
 		if (!ondisk)
 			sbuf_printf(sb, "%s", prefix);
 		sbuf_printf(sb, "sd name %s drive %s len %jds driveoffset "
 		    "%jds", s->name, s->drive, s->size / 512,
 		    s->drive_offset / 512);
 		if (s->plex_sc != NULL) {
 			sbuf_printf(sb, " plex %s plexoffset %jds", s->plex,
 			    s->plex_offset / 512);
 		}
 		if (ondisk)
 			sbuf_printf(sb, " state %s", gv_sdstate(s->state));
 		sbuf_printf(sb, "\n");
 	}
 }
 
 static off_t
 gv_plex_smallest_sd(struct gv_plex *p)
 {
 	struct gv_sd *s;
 	off_t smallest;
 
 	KASSERT(p != NULL, ("gv_plex_smallest_sd: NULL p"));
 
 	s = LIST_FIRST(&p->subdisks);
 	if (s == NULL)
 		return (-1);
 	smallest = s->size;
 	LIST_FOREACH(s, &p->subdisks, in_plex) {
 		if (s->size < smallest)
 			smallest = s->size;
 	}
 	return (smallest);
 }
 
 /* Walk over plexes in a volume and count how many are down. */
 int
 gv_plexdown(struct gv_volume *v)
 {
 	int plexdown;
 	struct gv_plex *p;
 
 	KASSERT(v != NULL, ("gv_plexdown: NULL v"));
 
 	plexdown = 0;
 
 	LIST_FOREACH(p, &v->plexes, plex) {
 		if (p->state == GV_PLEX_DOWN)
 			plexdown++;
 	}
 	return (plexdown);
 }
 
 int
 gv_sd_to_plex(struct gv_sd *s, struct gv_plex *p)
 {
 	struct gv_sd *s2;
 	off_t psizeorig, remainder, smallest;
 
 	/* If this subdisk was already given to this plex, do nothing. */
 	if (s->plex_sc == p)
 		return (0);
 
 	/* Check correct size of this subdisk. */
 	s2 = LIST_FIRST(&p->subdisks);
 	/* Adjust the subdisk-size if necessary. */
 	if (s2 != NULL && gv_is_striped(p)) {
 		/* First adjust to the stripesize. */
 		remainder = s->size % p->stripesize;
 
 		if (remainder) {
 			G_VINUM_DEBUG(1, "size of sd %s is not a "
 			    "multiple of plex stripesize, taking off "
 			    "%jd bytes", s->name,
 			    (intmax_t)remainder);
 			gv_adjust_freespace(s, remainder);
 		}
 
 		smallest = gv_plex_smallest_sd(p);
 		/* Then take off extra if other subdisks are smaller. */
 		remainder = s->size - smallest;
 
 		/*
 		 * Don't allow a remainder below zero for running plexes, it's too
 		 * painful, and if someone were to accidentally do this, the
 		 * resulting array might be smaller than the original... not god 
 		 */
 		if (remainder < 0) {
 			if (!(p->flags & GV_PLEX_NEWBORN)) {
 				G_VINUM_DEBUG(0, "sd %s too small for plex %s!",
 				    s->name, p->name);
 				return (GV_ERR_BADSIZE);
 			}
 			/* Adjust other subdisks. */
 			LIST_FOREACH(s2, &p->subdisks, in_plex) {
 				G_VINUM_DEBUG(1, "size of sd %s is to big, "
 				    "taking off %jd bytes", s->name,
 				    (intmax_t)remainder);
 				gv_adjust_freespace(s2, (remainder * -1));
 			}
 		} else if (remainder > 0) {
 			G_VINUM_DEBUG(1, "size of sd %s is to big, "
 			    "taking off %jd bytes", s->name,
 			    (intmax_t)remainder);
 			gv_adjust_freespace(s, remainder);
 		}
 	}
 
 	/* Find the correct plex offset for this subdisk, if needed. */
 	if (s->plex_offset == -1) {
 		/* 
 		 * First set it to 0 to catch the case where we had a detached
 		 * subdisk that didn't get any good offset.
 		 */
 		s->plex_offset = 0;
 		if (p->sdcount) {
 			LIST_FOREACH(s2, &p->subdisks, in_plex) {
 				if (gv_is_striped(p))
 					s->plex_offset = p->sdcount *
 					    p->stripesize;
 				else
 					s->plex_offset = s2->plex_offset +
 					    s2->size;
 			}
 		}
 	}
 
 	/* There are no subdisks for this plex yet, just insert it. */
 	if (LIST_EMPTY(&p->subdisks)) {
 		LIST_INSERT_HEAD(&p->subdisks, s, in_plex);
 
 	/* Insert in correct order, depending on plex_offset. */
 	} else {
 		LIST_FOREACH(s2, &p->subdisks, in_plex) {
 			if (s->plex_offset < s2->plex_offset) {
 				LIST_INSERT_BEFORE(s2, s, in_plex);
 				break;
 			} else if (LIST_NEXT(s2, in_plex) == NULL) {
 				LIST_INSERT_AFTER(s2, s, in_plex);
 				break;
 			}
 		}
 	}
 
 	s->plex_sc = p;
         /* Adjust the size of our plex. We check if the plex misses a subdisk,
 	 * so we don't make the plex smaller than it actually should be.
 	 */
 	psizeorig = p->size;
 	p->size = gv_plex_size(p);
 	/* Make sure the size is not changed. */
 	if (p->sddetached > 0) {
 		if (p->size < psizeorig) {
 			p->size = psizeorig;
 			/* We make sure wee need another subdisk. */
 			if (p->sddetached == 1)
 				p->sddetached++;
 		}
 		p->sddetached--;
 	} else {
 		if ((p->org == GV_PLEX_RAID5 ||
 		    p->org == GV_PLEX_STRIPED) &&
 		    !(p->flags & GV_PLEX_NEWBORN) && 
 		    p->state == GV_PLEX_UP) {
 			s->flags |= GV_SD_GROW;
 		}
 		p->sdcount++;
 	}
 
 	return (0);
 }
 
 void
 gv_update_vol_size(struct gv_volume *v, off_t size)
 {
 	if (v == NULL)
 		return;
 	if (v->provider != NULL) {
 		g_topology_lock();
 		v->provider->mediasize = size;
 		g_topology_unlock();
 	}
 	v->size = size;
 }
 
 /* Return how many subdisks that constitute the original plex. */
 int
 gv_sdcount(struct gv_plex *p, int growing)
 {
 	struct gv_sd *s;
 	int sdcount;
 
 	sdcount = p->sdcount;
 	if (growing) {
 		LIST_FOREACH(s, &p->subdisks, in_plex) {
 			if (s->flags & GV_SD_GROW)
 				sdcount--;
 		}
 	}
 
 	return (sdcount);
 }
 
 /* Calculates the plex size. */
 off_t
 gv_plex_size(struct gv_plex *p)
 {
 	struct gv_sd *s;
 	off_t size;
 	int sdcount;
 
 	KASSERT(p != NULL, ("gv_plex_size: NULL p"));
 
 	/* Adjust the size of our plex. */
 	size = 0;
 	sdcount = gv_sdcount(p, 1);
 	switch (p->org) {
 	case GV_PLEX_CONCAT:
 		LIST_FOREACH(s, &p->subdisks, in_plex)
 			size += s->size;
 		break;
 	case GV_PLEX_STRIPED:
 		s = LIST_FIRST(&p->subdisks);
 		size = ((s != NULL) ? (sdcount * s->size) : 0);
 		break;
 	case GV_PLEX_RAID5:
 		s = LIST_FIRST(&p->subdisks);
 		size = ((s != NULL) ? ((sdcount - 1) * s->size) : 0);
 		break;
 	}
 
 	return (size);
 }
 
 /* Returns the size of a volume. */
 off_t
 gv_vol_size(struct gv_volume *v)
 {
 	struct gv_plex *p;
 	off_t minplexsize;
 
 	KASSERT(v != NULL, ("gv_vol_size: NULL v"));
 
 	p = LIST_FIRST(&v->plexes);
 	if (p == NULL)
 		return (0);
 
 	minplexsize = p->size;
 	LIST_FOREACH(p, &v->plexes, in_volume) {
 		if (p->size < minplexsize) {
 			minplexsize = p->size;
 		}
 	}
 	return (minplexsize);
 }
 
 void
 gv_update_plex_config(struct gv_plex *p)
 {
 	struct gv_sd *s, *s2;
 	off_t remainder;
 	int required_sds, state;
 
 	KASSERT(p != NULL, ("gv_update_plex_config: NULL p"));
 
 	/* The plex was added to an already running volume. */
 	if (p->flags & GV_PLEX_ADDED)
 		gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE);
 
 	switch (p->org) {
 	case GV_PLEX_STRIPED:
 		required_sds = 2;
 		break;
 	case GV_PLEX_RAID5:
 		required_sds = 3;
 		break;
 	case GV_PLEX_CONCAT:
 	default:
 		required_sds = 0;
 		break;
 	}
 
 	if (required_sds) {
 		if (p->sdcount < required_sds) {
 			gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE);
 		}
 
 		/*
 		 * The subdisks in striped plexes must all have the same size.
 		 */
 		s = LIST_FIRST(&p->subdisks);
 		LIST_FOREACH(s2, &p->subdisks, in_plex) {
 			if (s->size != s2->size) {
 				G_VINUM_DEBUG(0, "subdisk size mismatch %s"
 				    "(%jd) <> %s (%jd)", s->name, s->size,
 				    s2->name, s2->size);
 				gv_set_plex_state(p, GV_PLEX_DOWN,
 				    GV_SETSTATE_FORCE);
 			}
 		}
 
 		LIST_FOREACH(s, &p->subdisks, in_plex) {
 			/* Trim subdisk sizes to match the stripe size. */
 			remainder = s->size % p->stripesize;
 			if (remainder) {
 				G_VINUM_DEBUG(1, "size of sd %s is not a "
 				    "multiple of plex stripesize, taking off "
 				    "%jd bytes", s->name, (intmax_t)remainder);
 				gv_adjust_freespace(s, remainder);
 			}
 		}
 	}
 
 	p->size = gv_plex_size(p);
 	if (p->sdcount == 0)
 		gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE);
 	else if (p->org == GV_PLEX_RAID5 && p->flags & GV_PLEX_NEWBORN) {
 		LIST_FOREACH(s, &p->subdisks, in_plex)
 			gv_set_sd_state(s, GV_SD_UP, GV_SETSTATE_FORCE);
 		/* If added to a volume, we want the plex to be down. */
 		state = (p->flags & GV_PLEX_ADDED) ? GV_PLEX_DOWN : GV_PLEX_UP;
 		gv_set_plex_state(p, state, GV_SETSTATE_FORCE);
 		p->flags &= ~GV_PLEX_ADDED;
 	} else if (p->flags & GV_PLEX_ADDED) {
 		LIST_FOREACH(s, &p->subdisks, in_plex)
 			gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE);
 		gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE);
 		p->flags &= ~GV_PLEX_ADDED;
 	} else if (p->state == GV_PLEX_UP) {
 		LIST_FOREACH(s, &p->subdisks, in_plex) {
 			if (s->flags & GV_SD_GROW) {
 				gv_set_plex_state(p, GV_PLEX_GROWABLE,
 				    GV_SETSTATE_FORCE);
 				break;
 			}
 		}
 	}
 	/* Our plex is grown up now. */
 	p->flags &= ~GV_PLEX_NEWBORN;
 }
 
 /*
  * Give a subdisk to a drive, check and adjust several parameters, adjust
  * freelist.
  */
 int
 gv_sd_to_drive(struct gv_sd *s, struct gv_drive *d)
 {
 	struct gv_sd *s2;
 	struct gv_freelist *fl, *fl2;
 	off_t tmp;
 	int i;
 
 	fl2 = NULL;
 
 	/* Shortcut for "referenced" drives. */
 	if (d->flags & GV_DRIVE_REFERENCED) {
 		s->drive_sc = d;
 		return (0);
 	}
 
 	/* Check if this subdisk was already given to this drive. */
 	if (s->drive_sc != NULL) {
 		if (s->drive_sc == d) {
 			if (!(s->flags & GV_SD_TASTED)) {
 				return (0);
 			}
 		} else {
 			G_VINUM_DEBUG(0, "error giving subdisk '%s' to '%s' "
 			    "(already on '%s')", s->name, d->name,
 			    s->drive_sc->name);
 			return (GV_ERR_ISATTACHED);
 		}
 	}
 
 	/* Preliminary checks. */
 	if ((s->size > d->avail) || (d->freelist_entries == 0)) {
 		G_VINUM_DEBUG(0, "not enough space on '%s' for '%s'", d->name,
 		    s->name);
 		return (GV_ERR_NOSPACE);
 	}
 
 	/* If no size was given for this subdisk, try to auto-size it... */
 	if (s->size == -1) {
 		/* Find the largest available slot. */
 		LIST_FOREACH(fl, &d->freelist, freelist) {
 			if (fl->size < s->size)
 				continue;
 			s->size = fl->size;
 			s->drive_offset = fl->offset;
 			fl2 = fl;
 		}
 
 		/* No good slot found? */
 		if (s->size == -1) {
 			G_VINUM_DEBUG(0, "unable to autosize '%s' on '%s'",
 			    s->name, d->name);
 			return (GV_ERR_BADSIZE);
 		}
 
 	/*
 	 * ... or check if we have a free slot that's large enough for the
 	 * given size.
 	 */
 	} else {
 		i = 0;
 		LIST_FOREACH(fl, &d->freelist, freelist) {
 			if (fl->size < s->size)
 				continue;
 			/* Assign drive offset, if not given. */
 			if (s->drive_offset == -1)
 				s->drive_offset = fl->offset;
 			fl2 = fl;
 			i++;
 			break;
 		}
 
 		/* Couldn't find a good free slot. */
 		if (i == 0) {
 			G_VINUM_DEBUG(0, "free slots to small for '%s' on '%s'",
 			    s->name, d->name);
 			return (GV_ERR_NOSPACE);
 		}
 	}
 
 	/* No drive offset given, try to calculate it. */
 	if (s->drive_offset == -1) {
 
 		/* Add offsets and sizes from other subdisks on this drive. */
 		LIST_FOREACH(s2, &d->subdisks, from_drive) {
 			s->drive_offset = s2->drive_offset + s2->size;
 		}
 
 		/*
 		 * If there are no other subdisks yet, then set the default
 		 * offset to GV_DATA_START.
 		 */
 		if (s->drive_offset == -1)
 			s->drive_offset = GV_DATA_START;
 
 	/* Check if we have a free slot at the given drive offset. */
 	} else {
 		i = 0;
 		LIST_FOREACH(fl, &d->freelist, freelist) {
 			/* Yes, this subdisk fits. */
 			if ((fl->offset <= s->drive_offset) &&
 			    (fl->offset + fl->size >=
 			    s->drive_offset + s->size)) {
 				i++;
 				fl2 = fl;
 				break;
 			}
 		}
 
 		/* Couldn't find a good free slot. */
 		if (i == 0) {
 			G_VINUM_DEBUG(0, "given drive_offset for '%s' won't fit "
 			    "on '%s'", s->name, d->name);
 			return (GV_ERR_NOSPACE);
 		}
 	}
 
 	/*
 	 * Now that all parameters are checked and set up, we can give the
 	 * subdisk to the drive and adjust the freelist.
 	 */
 
 	/* First, adjust the freelist. */
 	LIST_FOREACH(fl, &d->freelist, freelist) {
 		/* Look for the free slot that we have found before. */
 		if (fl != fl2)
 			continue;
 
 		/* The subdisk starts at the beginning of the free slot. */
 		if (fl->offset == s->drive_offset) {
 			fl->offset += s->size;
 			fl->size -= s->size;
 
 			/* The subdisk uses the whole slot, so remove it. */
 			if (fl->size == 0) {
 				d->freelist_entries--;
 				LIST_REMOVE(fl, freelist);
 			}
 		/*
 		 * The subdisk does not start at the beginning of the free
 		 * slot.
 		 */
 		} else {
 			tmp = fl->offset + fl->size;
 			fl->size = s->drive_offset - fl->offset;
 
 			/*
 			 * The subdisk didn't use the complete rest of the free
 			 * slot, so we need to split it.
 			 */
 			if (s->drive_offset + s->size != tmp) {
 				fl2 = g_malloc(sizeof(*fl2), M_WAITOK | M_ZERO);
 				fl2->offset = s->drive_offset + s->size;
 				fl2->size = tmp - fl2->offset;
 				LIST_INSERT_AFTER(fl, fl2, freelist);
 				d->freelist_entries++;
 			}
 		}
 		break;
 	}
 
 	/*
 	 * This is the first subdisk on this drive, just insert it into the
 	 * list.
 	 */
 	if (LIST_EMPTY(&d->subdisks)) {
 		LIST_INSERT_HEAD(&d->subdisks, s, from_drive);
 
 	/* There are other subdisks, so insert this one in correct order. */
 	} else {
 		LIST_FOREACH(s2, &d->subdisks, from_drive) {
 			if (s->drive_offset < s2->drive_offset) {
 				LIST_INSERT_BEFORE(s2, s, from_drive);
 				break;
 			} else if (LIST_NEXT(s2, from_drive) == NULL) {
 				LIST_INSERT_AFTER(s2, s, from_drive);
 				break;
 			}
 		}
 	}
 
 	d->sdcount++;
 	d->avail -= s->size;
 
 	s->flags &= ~GV_SD_TASTED;
 
 	/* Link back from the subdisk to this drive. */
 	s->drive_sc = d;
 
 	return (0);
 }
 
 void
 gv_free_sd(struct gv_sd *s)
 {
 	struct gv_drive *d;
 	struct gv_freelist *fl, *fl2;
 
 	KASSERT(s != NULL, ("gv_free_sd: NULL s"));
 
 	d = s->drive_sc;
 	if (d == NULL)
 		return;
 
 	/*
 	 * First, find the free slot that's immediately before or after this
 	 * subdisk.
 	 */
 	fl = NULL;
 	LIST_FOREACH(fl, &d->freelist, freelist) {
 		if (fl->offset == s->drive_offset + s->size)
 			break;
 		if (fl->offset + fl->size == s->drive_offset)
 			break;
 	}
 
 	/* If there is no free slot behind this subdisk, so create one. */
 	if (fl == NULL) {
 
 		fl = g_malloc(sizeof(*fl), M_WAITOK | M_ZERO);
 		fl->size = s->size;
 		fl->offset = s->drive_offset;
 
 		if (d->freelist_entries == 0) {
 			LIST_INSERT_HEAD(&d->freelist, fl, freelist);
 		} else {
 			LIST_FOREACH(fl2, &d->freelist, freelist) {
 				if (fl->offset < fl2->offset) {
 					LIST_INSERT_BEFORE(fl2, fl, freelist);
 					break;
 				} else if (LIST_NEXT(fl2, freelist) == NULL) {
 					LIST_INSERT_AFTER(fl2, fl, freelist);
 					break;
 				}
 			}
 		}
 
 		d->freelist_entries++;
 
 	/* Expand the free slot we just found. */
 	} else {
 		fl->size += s->size;
 		if (fl->offset > s->drive_offset)
 			fl->offset = s->drive_offset;
 	}
 
 	d->avail += s->size;
 	d->sdcount--;
 }
 
 void
 gv_adjust_freespace(struct gv_sd *s, off_t remainder)
 {
 	struct gv_drive *d;
 	struct gv_freelist *fl, *fl2;
 
 	KASSERT(s != NULL, ("gv_adjust_freespace: NULL s"));
 	d = s->drive_sc;
 	KASSERT(d != NULL, ("gv_adjust_freespace: NULL d"));
 
 	/* First, find the free slot that's immediately after this subdisk. */
 	fl = NULL;
 	LIST_FOREACH(fl, &d->freelist, freelist) {
 		if (fl->offset == s->drive_offset + s->size)
 			break;
 	}
 
 	/* If there is no free slot behind this subdisk, so create one. */
 	if (fl == NULL) {
 
 		fl = g_malloc(sizeof(*fl), M_WAITOK | M_ZERO);
 		fl->size = remainder;
 		fl->offset = s->drive_offset + s->size - remainder;
 
 		if (d->freelist_entries == 0) {
 			LIST_INSERT_HEAD(&d->freelist, fl, freelist);
 		} else {
 			LIST_FOREACH(fl2, &d->freelist, freelist) {
 				if (fl->offset < fl2->offset) {
 					LIST_INSERT_BEFORE(fl2, fl, freelist);
 					break;
 				} else if (LIST_NEXT(fl2, freelist) == NULL) {
 					LIST_INSERT_AFTER(fl2, fl, freelist);
 					break;
 				}
 			}
 		}
 
 		d->freelist_entries++;
 
 	/* Expand the free slot we just found. */
 	} else {
 		fl->offset -= remainder;
 		fl->size += remainder;
 	}
 
 	s->size -= remainder;
 	d->avail += remainder;
 }
 
 /* Check if the given plex is a striped one. */
 int
 gv_is_striped(struct gv_plex *p)
 {
 	KASSERT(p != NULL, ("gv_is_striped: NULL p"));
 	switch(p->org) {
 	case GV_PLEX_STRIPED:
 	case GV_PLEX_RAID5:
 		return (1);
 	default:
 		return (0);
 	}
 }
 
 /* Find a volume by name. */
 struct gv_volume *
 gv_find_vol(struct gv_softc *sc, char *name)
 {
 	struct gv_volume *v;
 
 	LIST_FOREACH(v, &sc->volumes, volume) {
 		if (!strncmp(v->name, name, GV_MAXVOLNAME))
 			return (v);
 	}
 
 	return (NULL);
 }
 
 /* Find a plex by name. */
 struct gv_plex *
 gv_find_plex(struct gv_softc *sc, char *name)
 {
 	struct gv_plex *p;
 
 	LIST_FOREACH(p, &sc->plexes, plex) {
 		if (!strncmp(p->name, name, GV_MAXPLEXNAME))
 			return (p);
 	}
 
 	return (NULL);
 }
 
 /* Find a subdisk by name. */
 struct gv_sd *
 gv_find_sd(struct gv_softc *sc, char *name)
 {
 	struct gv_sd *s;
 
 	LIST_FOREACH(s, &sc->subdisks, sd) {
 		if (!strncmp(s->name, name, GV_MAXSDNAME))
 			return (s);
 	}
 
 	return (NULL);
 }
 
 /* Find a drive by name. */
 struct gv_drive *
 gv_find_drive(struct gv_softc *sc, char *name)
 {
 	struct gv_drive *d;
 
 	LIST_FOREACH(d, &sc->drives, drive) {
 		if (!strncmp(d->name, name, GV_MAXDRIVENAME))
 			return (d);
 	}
 
 	return (NULL);
 }
 
 /* Find a drive given a device. */
 struct gv_drive *
 gv_find_drive_device(struct gv_softc *sc, char *device)
 {
 	struct gv_drive *d;
 
 	LIST_FOREACH(d, &sc->drives, drive) {
 		if(!strcmp(d->device, device))
 			return (d);
 	}
 
 	return (NULL);
 }
 
 /* Check if any consumer of the given geom is open. */
 int
 gv_consumer_is_open(struct g_consumer *cp)
 {
 	if (cp == NULL)
 		return (0);
 
 	if (cp->acr || cp->acw || cp->ace)
 		return (1);
 
 	return (0);
 }
 
 int
 gv_provider_is_open(struct g_provider *pp)
 {
 	if (pp == NULL)
 		return (0);
 
 	if (pp->acr || pp->acw || pp->ace)
 		return (1);
 
 	return (0);
 }
 
 /*
  * Compare the modification dates of the drives.
  * Return 1 if a > b, 0 otherwise.
  */
 int
 gv_drive_is_newer(struct gv_softc *sc, struct gv_drive *d)
 {
 	struct gv_drive *d2;
 	struct timeval *a, *b;
 
 	KASSERT(!LIST_EMPTY(&sc->drives),
 	    ("gv_is_drive_newer: empty drive list"));
 
 	a = &d->hdr->label.last_update;
 	LIST_FOREACH(d2, &sc->drives, drive) {
 		if ((d == d2) || (d2->state != GV_DRIVE_UP) ||
 		    (d2->hdr == NULL))
 			continue;
 		b = &d2->hdr->label.last_update;
 		if (timevalcmp(a, b, >))
 			return (1);
 	}
 
 	return (0);
 }
 
 /* Return the type of object identified by string 'name'. */
 int
 gv_object_type(struct gv_softc *sc, char *name)
 {
 	struct gv_drive *d;
 	struct gv_plex *p;
 	struct gv_sd *s;
 	struct gv_volume *v;
 
 	LIST_FOREACH(v, &sc->volumes, volume) {
 		if (!strncmp(v->name, name, GV_MAXVOLNAME))
 			return (GV_TYPE_VOL);
 	}
 
 	LIST_FOREACH(p, &sc->plexes, plex) {
 		if (!strncmp(p->name, name, GV_MAXPLEXNAME))
 			return (GV_TYPE_PLEX);
 	}
 
 	LIST_FOREACH(s, &sc->subdisks, sd) {
 		if (!strncmp(s->name, name, GV_MAXSDNAME))
 			return (GV_TYPE_SD);
 	}
 
 	LIST_FOREACH(d, &sc->drives, drive) {
 		if (!strncmp(d->name, name, GV_MAXDRIVENAME))
 			return (GV_TYPE_DRIVE);
 	}
 
 	return (GV_ERR_NOTFOUND);
 }
 
 void
 gv_setup_objects(struct gv_softc *sc)
 {
 	struct g_provider *pp;
 	struct gv_volume *v;
 	struct gv_plex *p;
 	struct gv_sd *s;
 	struct gv_drive *d;
 
 	LIST_FOREACH(s, &sc->subdisks, sd) {
 		d = gv_find_drive(sc, s->drive);
 		if (d != NULL)
 			gv_sd_to_drive(s, d);
 		p = gv_find_plex(sc, s->plex);
 		if (p != NULL)
 			gv_sd_to_plex(s, p);
 		gv_update_sd_state(s);
 	}
 
 	LIST_FOREACH(p, &sc->plexes, plex) {
 		gv_update_plex_config(p);
 		v = gv_find_vol(sc, p->volume);
 		if (v != NULL && p->vol_sc != v) {
 			p->vol_sc = v;
 			v->plexcount++;
 			LIST_INSERT_HEAD(&v->plexes, p, in_volume);
 		}
 		gv_update_plex_config(p);
 	}
 
 	LIST_FOREACH(v, &sc->volumes, volume) {
 		v->size = gv_vol_size(v);
 		if (v->provider == NULL) {
 			g_topology_lock();
 			pp = g_new_providerf(sc->geom, "gvinum/%s", v->name);
 			pp->mediasize = v->size;
 			pp->sectorsize = 512;    /* XXX */
 			g_error_provider(pp, 0);
 			v->provider = pp;
 			pp->private = v;
 			g_topology_unlock();
 		} else if (v->provider->mediasize != v->size) {
 			g_topology_lock();
 			v->provider->mediasize = v->size;
 			g_topology_unlock();
 		}
 		v->flags &= ~GV_VOL_NEWBORN;
 		gv_update_vol_state(v);
 	}
 }
 
 void
 gv_cleanup(struct gv_softc *sc)
 {
 	struct gv_volume *v, *v2;
 	struct gv_plex *p, *p2;
 	struct gv_sd *s, *s2;
 	struct gv_drive *d, *d2;
 	struct gv_freelist *fl, *fl2;
 
 	mtx_lock(&sc->config_mtx);
 	LIST_FOREACH_SAFE(v, &sc->volumes, volume, v2) {
 		LIST_REMOVE(v, volume);
 		g_free(v->wqueue);
 		g_free(v);
 	}
 	LIST_FOREACH_SAFE(p, &sc->plexes, plex, p2) {
 		LIST_REMOVE(p, plex);
 		g_free(p->bqueue);
 		g_free(p->rqueue);
 		g_free(p->wqueue);
 		g_free(p);
 	}
 	LIST_FOREACH_SAFE(s, &sc->subdisks, sd, s2) {
 		LIST_REMOVE(s, sd);
 		g_free(s);
 	}
 	LIST_FOREACH_SAFE(d, &sc->drives, drive, d2) {
 		LIST_FOREACH_SAFE(fl, &d->freelist, freelist, fl2) {
 			LIST_REMOVE(fl, freelist);
 			g_free(fl);
 		}
 		LIST_REMOVE(d, drive);
 		g_free(d->hdr);
 		g_free(d);
 	}
 	mtx_destroy(&sc->config_mtx);
 }
 
 /* General 'attach' routine. */
 int
 gv_attach_plex(struct gv_plex *p, struct gv_volume *v, int rename)
 {
 	struct gv_sd *s;
 	struct gv_softc *sc;
 
 	g_topology_assert();
 
 	sc = p->vinumconf;
 	KASSERT(sc != NULL, ("NULL sc"));
 
 	if (p->vol_sc != NULL) {
 		G_VINUM_DEBUG(1, "unable to attach %s: already attached to %s",
 		    p->name, p->volume);
 		return (GV_ERR_ISATTACHED);
 	}
 
 	/* Stale all subdisks of this plex. */
 	LIST_FOREACH(s, &p->subdisks, in_plex) {
 		if (s->state != GV_SD_STALE)
 			gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE);
 	}
 	/* Attach to volume. Make sure volume is not up and running. */
 	if (gv_provider_is_open(v->provider)) {
 		G_VINUM_DEBUG(1, "unable to attach %s: volume %s is busy",
 		    p->name, v->name);
 		return (GV_ERR_ISBUSY);
 	}
 	p->vol_sc = v;
 	strlcpy(p->volume, v->name, sizeof(p->volume));
 	v->plexcount++;
 	if (rename) {
 		snprintf(p->name, sizeof(p->name), "%s.p%d", v->name,
 		    v->plexcount);
 	}
 	LIST_INSERT_HEAD(&v->plexes, p, in_volume);
 
 	/* Get plex up again. */
 	gv_update_vol_size(v, gv_vol_size(v));
 	gv_set_plex_state(p, GV_PLEX_UP, 0);
 	gv_save_config(p->vinumconf);
 	return (0);
 }
 
 int
 gv_attach_sd(struct gv_sd *s, struct gv_plex *p, off_t offset, int rename)
 {
 	struct gv_sd *s2;
 	int error, sdcount;
 
 	g_topology_assert();
 
 	/* If subdisk is attached, don't do it. */
 	if (s->plex_sc != NULL) {
 		G_VINUM_DEBUG(1, "unable to attach %s: already attached to %s",
 		    s->name, s->plex);
 		return (GV_ERR_ISATTACHED);
 	}
 
 	gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE);
 	/* First check that this subdisk has a correct offset. If none other
 	 * starts at the same, and it's correct module stripesize, it is */
 	if (offset != -1 && offset % p->stripesize != 0)
 		return (GV_ERR_BADOFFSET);
 	LIST_FOREACH(s2, &p->subdisks, in_plex) {
 		if (s2->plex_offset == offset)
 			return (GV_ERR_BADOFFSET);
 	}
 
 	/* Attach the subdisk to the plex at given offset. */
 	s->plex_offset = offset;
 	strlcpy(s->plex, p->name, sizeof(s->plex));
 
 	sdcount = p->sdcount;
 	error = gv_sd_to_plex(s, p);
 	if (error)
 		return (error);
 	gv_update_plex_config(p);
 
 	if (rename) {
 		snprintf(s->name, sizeof(s->name), "%s.s%d", s->plex,
 		    p->sdcount);
 	}
 	if (p->vol_sc != NULL)
 		gv_update_vol_size(p->vol_sc, gv_vol_size(p->vol_sc));
 	gv_save_config(p->vinumconf);
 	/* We don't update the subdisk state since the user might have to
 	 * initiate a rebuild/sync first. */
 	return (0);
 }
 
 /* Detach a plex from a volume. */
 int
 gv_detach_plex(struct gv_plex *p, int flags)
 {
 	struct gv_volume *v;
 
 	g_topology_assert();
 	v = p->vol_sc;
 
 	if (v == NULL) {
 		G_VINUM_DEBUG(1, "unable to detach %s: already detached",
 		    p->name);
 		return (0); /* Not an error. */
 	}
 
 	/*
 	 * Only proceed if forced or volume inactive.
 	 */
 	if (!(flags & GV_FLAG_F) && (gv_provider_is_open(v->provider) ||
 	    p->state == GV_PLEX_UP)) {
 		G_VINUM_DEBUG(1, "unable to detach %s: volume %s is busy",
 		    p->name, p->volume);
 		return (GV_ERR_ISBUSY);
 	}
 	v->plexcount--;
 	/* Make sure someone don't read us when gone. */
 	v->last_read_plex = NULL; 
 	LIST_REMOVE(p, in_volume);
 	p->vol_sc = NULL;
 	memset(p->volume, 0, GV_MAXVOLNAME);
 	gv_update_vol_size(v, gv_vol_size(v));
 	gv_save_config(p->vinumconf);
 	return (0);
 }
 
 /* Detach a subdisk from a plex. */
 int
 gv_detach_sd(struct gv_sd *s, int flags)
 {
 	struct gv_plex *p;
 
 	g_topology_assert();
 	p = s->plex_sc;
 
 	if (p == NULL) {
 		G_VINUM_DEBUG(1, "unable to detach %s: already detached",
 		    s->name);
 		return (0); /* Not an error. */
 	}
 
 	/*
 	 * Don't proceed if we're not forcing, and the plex is up, or degraded
 	 * with this subdisk up.
 	 */
 	if (!(flags & GV_FLAG_F) && ((p->state > GV_PLEX_DEGRADED) ||
 	    ((p->state == GV_PLEX_DEGRADED) && (s->state == GV_SD_UP)))) {
 	    	G_VINUM_DEBUG(1, "unable to detach %s: plex %s is busy",
 		    s->name, s->plex);
 		return (GV_ERR_ISBUSY);
 	}
 
 	LIST_REMOVE(s, in_plex);
 	s->plex_sc = NULL;
 	memset(s->plex, 0, GV_MAXPLEXNAME);
 	p->sddetached++;
 	gv_save_config(s->vinumconf);
 	return (0);
 }
Index: head/sys/geom/virstor/g_virstor.c
===================================================================
--- head/sys/geom/virstor/g_virstor.c	(revision 350693)
+++ head/sys/geom/virstor/g_virstor.c	(revision 350694)
@@ -1,1894 +1,1895 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2006-2007 Ivan Voras <ivoras@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /* Implementation notes:
  * - "Components" are wrappers around providers that make up the
  *   virtual storage (i.e. a virstor has "physical" components)
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/sx.h>
 #include <sys/bio.h>
 #include <sys/sbuf.h>
 #include <sys/sysctl.h>
 #include <sys/malloc.h>
 #include <sys/time.h>
 #include <sys/proc.h>
 #include <sys/kthread.h>
 #include <sys/mutex.h>
 #include <vm/uma.h>
 #include <geom/geom.h>
+#include <geom/geom_dbg.h>
 
 #include <geom/virstor/g_virstor.h>
 #include <geom/virstor/g_virstor_md.h>
 
 FEATURE(g_virstor, "GEOM virtual storage support");
 
 /* Declare malloc(9) label */
 static MALLOC_DEFINE(M_GVIRSTOR, "gvirstor", "GEOM_VIRSTOR Data");
 
 /* GEOM class methods */
 static g_init_t g_virstor_init;
 static g_fini_t g_virstor_fini;
 static g_taste_t g_virstor_taste;
 static g_ctl_req_t g_virstor_config;
 static g_ctl_destroy_geom_t g_virstor_destroy_geom;
 
 /* Declare & initialize class structure ("geom class") */
 struct g_class g_virstor_class = {
 	.name =		G_VIRSTOR_CLASS_NAME,
 	.version =	G_VERSION,
 	.init =		g_virstor_init,
 	.fini =		g_virstor_fini,
 	.taste =	g_virstor_taste,
 	.ctlreq =	g_virstor_config,
 	.destroy_geom = g_virstor_destroy_geom
 	/* The .dumpconf and the rest are only usable for a geom instance, so
 	 * they will be set when such instance is created. */
 };
 
 /* Declare sysctl's and loader tunables */
 SYSCTL_DECL(_kern_geom);
 static SYSCTL_NODE(_kern_geom, OID_AUTO, virstor, CTLFLAG_RW, 0,
     "GEOM_GVIRSTOR information");
 
 static u_int g_virstor_debug = 2; /* XXX: lower to 2 when released to public */
 SYSCTL_UINT(_kern_geom_virstor, OID_AUTO, debug, CTLFLAG_RWTUN, &g_virstor_debug,
     0, "Debug level (2=production, 5=normal, 15=excessive)");
 
 static u_int g_virstor_chunk_watermark = 100;
 SYSCTL_UINT(_kern_geom_virstor, OID_AUTO, chunk_watermark, CTLFLAG_RWTUN,
     &g_virstor_chunk_watermark, 0,
     "Minimum number of free chunks before issuing administrative warning");
 
 static u_int g_virstor_component_watermark = 1;
 SYSCTL_UINT(_kern_geom_virstor, OID_AUTO, component_watermark, CTLFLAG_RWTUN,
     &g_virstor_component_watermark, 0,
     "Minimum number of free components before issuing administrative warning");
 
 static int read_metadata(struct g_consumer *, struct g_virstor_metadata *);
 static void write_metadata(struct g_consumer *, struct g_virstor_metadata *);
 static int clear_metadata(struct g_virstor_component *);
 static int add_provider_to_geom(struct g_virstor_softc *, struct g_provider *,
     struct g_virstor_metadata *);
 static struct g_geom *create_virstor_geom(struct g_class *,
     struct g_virstor_metadata *);
 static void virstor_check_and_run(struct g_virstor_softc *);
 static u_int virstor_valid_components(struct g_virstor_softc *);
 static int virstor_geom_destroy(struct g_virstor_softc *, boolean_t,
     boolean_t);
 static void remove_component(struct g_virstor_softc *,
     struct g_virstor_component *, boolean_t);
 static void bioq_dismantle(struct bio_queue_head *);
 static int allocate_chunk(struct g_virstor_softc *,
     struct g_virstor_component **, u_int *, u_int *);
 static void delay_destroy_consumer(void *, int);
 static void dump_component(struct g_virstor_component *comp);
 #if 0
 static void dump_me(struct virstor_map_entry *me, unsigned int nr);
 #endif
 
 static void virstor_ctl_stop(struct gctl_req *, struct g_class *);
 static void virstor_ctl_add(struct gctl_req *, struct g_class *);
 static void virstor_ctl_remove(struct gctl_req *, struct g_class *);
 static struct g_virstor_softc * virstor_find_geom(const struct g_class *,
     const char *);
 static void update_metadata(struct g_virstor_softc *);
 static void fill_metadata(struct g_virstor_softc *, struct g_virstor_metadata *,
     u_int, u_int);
 
 static void g_virstor_orphan(struct g_consumer *);
 static int g_virstor_access(struct g_provider *, int, int, int);
 static void g_virstor_start(struct bio *);
 static void g_virstor_dumpconf(struct sbuf *, const char *, struct g_geom *,
     struct g_consumer *, struct g_provider *);
 static void g_virstor_done(struct bio *);
 
 static void invalid_call(void);
 /*
  * Initialise GEOM class (per-class callback)
  */
 static void
 g_virstor_init(struct g_class *mp __unused)
 {
 
 	/* Catch map struct size mismatch at compile time; Map entries must
 	 * fit into MAXPHYS exactly, with no wasted space. */
 	CTASSERT(VIRSTOR_MAP_BLOCK_ENTRIES*VIRSTOR_MAP_ENTRY_SIZE == MAXPHYS);
 
 	/* Init UMA zones, TAILQ's, other global vars */
 }
 
 /*
  * Finalise GEOM class (per-class callback)
  */
 static void
 g_virstor_fini(struct g_class *mp __unused)
 {
 
 	/* Deinit UMA zones & global vars */
 }
 
 /*
  * Config (per-class callback)
  */
 static void
 g_virstor_config(struct gctl_req *req, struct g_class *cp, char const *verb)
 {
 	uint32_t *version;
 
 	g_topology_assert();
 
 	version = gctl_get_paraml(req, "version", sizeof(*version));
 	if (version == NULL) {
 		gctl_error(req, "Failed to get 'version' argument");
 		return;
 	}
 	if (*version != G_VIRSTOR_VERSION) {
 		gctl_error(req, "Userland and kernel versions out of sync");
 		return;
 	}
 
 	g_topology_unlock();
 	if (strcmp(verb, "add") == 0)
 		virstor_ctl_add(req, cp);
 	else if (strcmp(verb, "stop") == 0 || strcmp(verb, "destroy") == 0)
 		virstor_ctl_stop(req, cp);
 	else if (strcmp(verb, "remove") == 0)
 		virstor_ctl_remove(req, cp);
 	else
 		gctl_error(req, "unknown verb: '%s'", verb);
 	g_topology_lock();
 }
 
 /*
  * "stop" verb from userland
  */
 static void
 virstor_ctl_stop(struct gctl_req *req, struct g_class *cp)
 {
 	int *force, *nargs;
 	int i;
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof *nargs);
 	if (nargs == NULL) {
 		gctl_error(req, "Error fetching argument '%s'", "nargs");
 		return;
 	}
 	if (*nargs < 1) {
 		gctl_error(req, "Invalid number of arguments");
 		return;
 	}
 	force = gctl_get_paraml(req, "force", sizeof *force);
 	if (force == NULL) {
 		gctl_error(req, "Error fetching argument '%s'", "force");
 		return;
 	}
 
 	g_topology_lock();
 	for (i = 0; i < *nargs; i++) {
 		char param[8];
 		const char *name;
 		struct g_virstor_softc *sc;
 		int error;
 
 		sprintf(param, "arg%d", i);
 		name = gctl_get_asciiparam(req, param);
 		if (name == NULL) {
 			gctl_error(req, "No 'arg%d' argument", i);
 			g_topology_unlock();
 			return;
 		}
 		sc = virstor_find_geom(cp, name);
 		if (sc == NULL) {
 			gctl_error(req, "Don't know anything about '%s'", name);
 			g_topology_unlock();
 			return;
 		}
 
 		LOG_MSG(LVL_INFO, "Stopping %s by the userland command",
 		    sc->geom->name);
 		update_metadata(sc);
 		if ((error = virstor_geom_destroy(sc, TRUE, TRUE)) != 0) {
 			LOG_MSG(LVL_ERROR, "Cannot destroy %s: %d",
 			    sc->geom->name, error);
 		}
 	}
 	g_topology_unlock();
 }
 
 /*
  * "add" verb from userland - add new component(s) to the structure.
  * This will be done all at once in here, without going through the
  * .taste function for new components.
  */
 static void
 virstor_ctl_add(struct gctl_req *req, struct g_class *cp)
 {
 	/* Note: while this is going on, I/O is being done on
 	 * the g_up and g_down threads. The idea is to make changes
 	 * to softc members in a way that can atomically activate
 	 * them all at once. */
 	struct g_virstor_softc *sc;
 	int *hardcode, *nargs;
 	const char *geom_name;	/* geom to add a component to */
 	struct g_consumer *fcp;
 	struct g_virstor_bio_q *bq;
 	u_int added;
 	int error;
 	int i;
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "Error fetching argument '%s'", "nargs");
 		return;
 	}
 	if (*nargs < 2) {
 		gctl_error(req, "Invalid number of arguments");
 		return;
 	}
 	hardcode = gctl_get_paraml(req, "hardcode", sizeof(*hardcode));
 	if (hardcode == NULL) {
 		gctl_error(req, "Error fetching argument '%s'", "hardcode");
 		return;
 	}
 
 	/* Find "our" geom */
 	geom_name = gctl_get_asciiparam(req, "arg0");
 	if (geom_name == NULL) {
 		gctl_error(req, "Error fetching argument '%s'", "geom_name (arg0)");
 		return;
 	}
 	sc = virstor_find_geom(cp, geom_name);
 	if (sc == NULL) {
 		gctl_error(req, "Don't know anything about '%s'", geom_name);
 		return;
 	}
 
 	if (virstor_valid_components(sc) != sc->n_components) {
 		LOG_MSG(LVL_ERROR, "Cannot add components to incomplete "
 		    "virstor %s", sc->geom->name);
 		gctl_error(req, "Virstor %s is incomplete", sc->geom->name);
 		return;
 	}
 
 	fcp = sc->components[0].gcons;
 	added = 0;
 	g_topology_lock();
 	for (i = 1; i < *nargs; i++) {
 		struct g_virstor_metadata md;
 		char aname[8];
 		const char *prov_name;
 		struct g_provider *pp;
 		struct g_consumer *cp;
 		u_int nc;
 		u_int j;
 
 		snprintf(aname, sizeof aname, "arg%d", i);
 		prov_name = gctl_get_asciiparam(req, aname);
 		if (prov_name == NULL) {
 			gctl_error(req, "Error fetching argument '%s'", aname);
 			g_topology_unlock();
 			return;
 		}
 		if (strncmp(prov_name, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0)
 			prov_name += sizeof(_PATH_DEV) - 1;
 
 		pp = g_provider_by_name(prov_name);
 		if (pp == NULL) {
 			/* This is the most common error so be verbose about it */
 			if (added != 0) {
 				gctl_error(req, "Invalid provider: '%s' (added"
 				    " %u components)", prov_name, added);
 				update_metadata(sc);
 			} else {
 				gctl_error(req, "Invalid provider: '%s'",
 				    prov_name);
 			}
 			g_topology_unlock();
 			return;
 		}
 		cp = g_new_consumer(sc->geom);
 		if (cp == NULL) {
 			gctl_error(req, "Cannot create consumer");
 			g_topology_unlock();
 			return;
 		}
 		error = g_attach(cp, pp);
 		if (error != 0) {
 			gctl_error(req, "Cannot attach a consumer to %s",
 			    pp->name);
 			g_destroy_consumer(cp);
 			g_topology_unlock();
 			return;
 		}
 		if (fcp->acr != 0 || fcp->acw != 0 || fcp->ace != 0) {
 			error = g_access(cp, fcp->acr, fcp->acw, fcp->ace);
 			if (error != 0) {
 				gctl_error(req, "Access request failed for %s",
 				    pp->name);
 				g_destroy_consumer(cp);
 				g_topology_unlock();
 				return;
 			}
 		}
 		if (fcp->provider->sectorsize != pp->sectorsize) {
 			gctl_error(req, "Sector size doesn't fit for %s",
 			    pp->name);
 			g_destroy_consumer(cp);
 			g_topology_unlock();
 			return;
 		}
 		for (j = 0; j < sc->n_components; j++) {
 			if (strcmp(sc->components[j].gcons->provider->name,
 			    pp->name) == 0) {
 				gctl_error(req, "Component %s already in %s",
 				    pp->name, sc->geom->name);
 				g_destroy_consumer(cp);
 				g_topology_unlock();
 				return;
 			}
 		}
 		sc->components = realloc(sc->components,
 		    sizeof(*sc->components) * (sc->n_components + 1),
 		    M_GVIRSTOR, M_WAITOK);
 
 		nc = sc->n_components;
 		sc->components[nc].gcons = cp;
 		sc->components[nc].sc = sc;
 		sc->components[nc].index = nc;
 		sc->components[nc].chunk_count = cp->provider->mediasize /
 		    sc->chunk_size;
 		sc->components[nc].chunk_next = 0;
 		sc->components[nc].chunk_reserved = 0;
 
 		if (sc->components[nc].chunk_count < 4) {
 			gctl_error(req, "Provider too small: %s",
 			    cp->provider->name);
 			g_destroy_consumer(cp);
 			g_topology_unlock();
 			return;
 		}
 		fill_metadata(sc, &md, nc, *hardcode);
 		write_metadata(cp, &md);
 		/* The new component becomes visible when n_components is
 		 * incremented */
 		sc->n_components++;
 		added++;
 
 	}
 	/* This call to update_metadata() is critical. In case there's a
 	 * power failure in the middle of it and some components are updated
 	 * while others are not, there will be trouble on next .taste() iff
 	 * a non-updated component is detected first */
 	update_metadata(sc);
 	g_topology_unlock();
 	LOG_MSG(LVL_INFO, "Added %d component(s) to %s", added,
 	    sc->geom->name);
 	/* Fire off BIOs previously queued because there wasn't any
 	 * physical space left. If the BIOs still can't be satisfied
 	 * they will again be added to the end of the queue (during
 	 * which the mutex will be recursed) */
 	bq = malloc(sizeof(*bq), M_GVIRSTOR, M_WAITOK);
 	bq->bio = NULL;
 	mtx_lock(&sc->delayed_bio_q_mtx);
 	/* First, insert a sentinel to the queue end, so we don't
 	 * end up in an infinite loop if there's still no free
 	 * space available. */
 	STAILQ_INSERT_TAIL(&sc->delayed_bio_q, bq, linkage);
 	while (!STAILQ_EMPTY(&sc->delayed_bio_q)) {
 		bq = STAILQ_FIRST(&sc->delayed_bio_q);
 		if (bq->bio != NULL) {
 			g_virstor_start(bq->bio);
 			STAILQ_REMOVE_HEAD(&sc->delayed_bio_q, linkage);
 			free(bq, M_GVIRSTOR);
 		} else {
 			STAILQ_REMOVE_HEAD(&sc->delayed_bio_q, linkage);
 			free(bq, M_GVIRSTOR);
 			break;
 		}
 	}
 	mtx_unlock(&sc->delayed_bio_q_mtx);
 
 }
 
 /*
  * Find a geom handled by the class
  */
 static struct g_virstor_softc *
 virstor_find_geom(const struct g_class *cp, const char *name)
 {
 	struct g_geom *gp;
 
 	LIST_FOREACH(gp, &cp->geom, geom) {
 		if (strcmp(name, gp->name) == 0)
 			return (gp->softc);
 	}
 	return (NULL);
 }
 
 /*
  * Update metadata on all components to reflect the current state
  * of these fields:
  *    - chunk_next
  *    - flags
  *    - md_count
  * Expects things to be set up so write_metadata() can work, i.e.
  * the topology lock must be held.
  */
 static void
 update_metadata(struct g_virstor_softc *sc)
 {
 	struct g_virstor_metadata md;
 	u_int n;
 
 	if (virstor_valid_components(sc) != sc->n_components)
 		return; /* Incomplete device */
 	LOG_MSG(LVL_DEBUG, "Updating metadata on components for %s",
 	    sc->geom->name);
 	/* Update metadata on components */
 	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__,
 	    sc->geom->class->name, sc->geom->name);
 	g_topology_assert();
 	for (n = 0; n < sc->n_components; n++) {
 		read_metadata(sc->components[n].gcons, &md);
 		md.chunk_next = sc->components[n].chunk_next;
 		md.flags = sc->components[n].flags;
 		md.md_count = sc->n_components;
 		write_metadata(sc->components[n].gcons, &md);
 	}
 }
 
 /*
  * Fills metadata (struct md) from information stored in softc and the nc'th
  * component of virstor
  */
 static void
 fill_metadata(struct g_virstor_softc *sc, struct g_virstor_metadata *md,
     u_int nc, u_int hardcode)
 {
 	struct g_virstor_component *c;
 
 	bzero(md, sizeof *md);
 	c = &sc->components[nc];
 
 	strncpy(md->md_magic, G_VIRSTOR_MAGIC, sizeof md->md_magic);
 	md->md_version = G_VIRSTOR_VERSION;
 	strncpy(md->md_name, sc->geom->name, sizeof md->md_name);
 	md->md_id = sc->id;
 	md->md_virsize = sc->virsize;
 	md->md_chunk_size = sc->chunk_size;
 	md->md_count = sc->n_components;
 
 	if (hardcode) {
 		strncpy(md->provider, c->gcons->provider->name,
 		    sizeof md->provider);
 	}
 	md->no = nc;
 	md->provsize = c->gcons->provider->mediasize;
 	md->chunk_count = c->chunk_count;
 	md->chunk_next = c->chunk_next;
 	md->chunk_reserved = c->chunk_reserved;
 	md->flags = c->flags;
 }
 
 /*
  * Remove a component from virstor device.
  * Can only be done if the component is unallocated.
  */
 static void
 virstor_ctl_remove(struct gctl_req *req, struct g_class *cp)
 {
 	/* As this is executed in parallel to I/O, operations on virstor
 	 * structures must be as atomic as possible. */
 	struct g_virstor_softc *sc;
 	int *nargs;
 	const char *geom_name;
 	u_int removed;
 	int i;
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "Error fetching argument '%s'", "nargs");
 		return;
 	}
 	if (*nargs < 2) {
 		gctl_error(req, "Invalid number of arguments");
 		return;
 	}
 	/* Find "our" geom */
 	geom_name = gctl_get_asciiparam(req, "arg0");
 	if (geom_name == NULL) {
 		gctl_error(req, "Error fetching argument '%s'",
 		    "geom_name (arg0)");
 		return;
 	}
 	sc = virstor_find_geom(cp, geom_name);
 	if (sc == NULL) {
 		gctl_error(req, "Don't know anything about '%s'", geom_name);
 		return;
 	}
 
 	if (virstor_valid_components(sc) != sc->n_components) {
 		LOG_MSG(LVL_ERROR, "Cannot remove components from incomplete "
 		    "virstor %s", sc->geom->name);
 		gctl_error(req, "Virstor %s is incomplete", sc->geom->name);
 		return;
 	}
 
 	removed = 0;
 	for (i = 1; i < *nargs; i++) {
 		char param[8];
 		const char *prov_name;
 		int j, found;
 		struct g_virstor_component *newcomp, *compbak;
 
 		sprintf(param, "arg%d", i);
 		prov_name = gctl_get_asciiparam(req, param);
 		if (prov_name == NULL) {
 			gctl_error(req, "Error fetching argument '%s'", param);
 			return;
 		}
 		if (strncmp(prov_name, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0)
 			prov_name += sizeof(_PATH_DEV) - 1;
 
 		found = -1;
 		for (j = 0; j < sc->n_components; j++) {
 			if (strcmp(sc->components[j].gcons->provider->name,
 			    prov_name) == 0) {
 				found = j;
 				break;
 			}
 		}
 		if (found == -1) {
 			LOG_MSG(LVL_ERROR, "No %s component in %s",
 			    prov_name, sc->geom->name);
 			continue;
 		}
 
 		compbak = sc->components;
 		newcomp = malloc(sc->n_components * sizeof(*sc->components),
 		    M_GVIRSTOR, M_WAITOK | M_ZERO);
 		bcopy(sc->components, newcomp, found * sizeof(*sc->components));
 		bcopy(&sc->components[found + 1], newcomp + found,
 		    found * sizeof(*sc->components));
 		if ((sc->components[j].flags & VIRSTOR_PROVIDER_ALLOCATED) != 0) {
 			LOG_MSG(LVL_ERROR, "Allocated provider %s cannot be "
 			    "removed from %s",
 			    prov_name, sc->geom->name);
 			free(newcomp, M_GVIRSTOR);
 			/* We'll consider this non-fatal error */
 			continue;
 		}
 		/* Renumerate unallocated components */
 		for (j = 0; j < sc->n_components-1; j++) {
 			if ((sc->components[j].flags &
 			    VIRSTOR_PROVIDER_ALLOCATED) == 0) {
 				sc->components[j].index = j;
 			}
 		}
 		/* This is the critical section. If a component allocation
 		 * event happens while both variables are not yet set,
 		 * there will be trouble. Something will panic on encountering
 		 * NULL sc->components[x].gcomp member.
 		 * Luckily, component allocation happens very rarely and
 		 * removing components is an abnormal action in any case. */
 		sc->components = newcomp;
 		sc->n_components--;
 		/* End critical section */
 
 		g_topology_lock();
 		if (clear_metadata(&compbak[found]) != 0) {
 			LOG_MSG(LVL_WARNING, "Trouble ahead: cannot clear "
 			    "metadata on %s", prov_name);
 		}
 		g_detach(compbak[found].gcons);
 		g_destroy_consumer(compbak[found].gcons);
 		g_topology_unlock();
 
 		free(compbak, M_GVIRSTOR);
 
 		removed++;
 	}
 
 	/* This call to update_metadata() is critical. In case there's a
 	 * power failure in the middle of it and some components are updated
 	 * while others are not, there will be trouble on next .taste() iff
 	 * a non-updated component is detected first */
 	g_topology_lock();
 	update_metadata(sc);
 	g_topology_unlock();
 	LOG_MSG(LVL_INFO, "Removed %d component(s) from %s", removed,
 	    sc->geom->name);
 }
 
 /*
  * Clear metadata sector on component
  */
 static int
 clear_metadata(struct g_virstor_component *comp)
 {
 	char *buf;
 	int error;
 
 	LOG_MSG(LVL_INFO, "Clearing metadata on %s",
 	    comp->gcons->provider->name);
 	g_topology_assert();
 	error = g_access(comp->gcons, 0, 1, 0);
 	if (error != 0)
 		return (error);
 	buf = malloc(comp->gcons->provider->sectorsize, M_GVIRSTOR,
 	    M_WAITOK | M_ZERO);
 	error = g_write_data(comp->gcons,
 	    comp->gcons->provider->mediasize -
 	    comp->gcons->provider->sectorsize,
 	    buf,
 	    comp->gcons->provider->sectorsize);
 	free(buf, M_GVIRSTOR);
 	g_access(comp->gcons, 0, -1, 0);
 	return (error);
 }
 
 /*
  * Destroy geom forcibly.
  */
 static int
 g_virstor_destroy_geom(struct gctl_req *req __unused, struct g_class *mp,
     struct g_geom *gp)
 {
 	struct g_virstor_softc *sc;
 	int exitval;
 
 	sc = gp->softc;
 	KASSERT(sc != NULL, ("%s: NULL sc", __func__));
 	
 	exitval = 0;
 	LOG_MSG(LVL_DEBUG, "%s called for %s, sc=%p", __func__, gp->name,
 	    gp->softc);
 
 	if (sc != NULL) {
 #ifdef INVARIANTS
 		char *buf;
 		int error;
 		off_t off;
 		int isclean, count;
 		int n;
 
 		LOG_MSG(LVL_INFO, "INVARIANTS detected");
 		LOG_MSG(LVL_INFO, "Verifying allocation "
 		    "table for %s", sc->geom->name);
 		count = 0;
 		for (n = 0; n < sc->chunk_count; n++) {
 			if (sc->map[n].flags || VIRSTOR_MAP_ALLOCATED != 0)
 				count++;
 		}
 		LOG_MSG(LVL_INFO, "Device %s has %d allocated chunks",
 		    sc->geom->name, count);
 		n = off = count = 0;
 		isclean = 1;
 		if (virstor_valid_components(sc) != sc->n_components) {
 			/* This is a incomplete virstor device (not all
 			 * components have been found) */
 			LOG_MSG(LVL_ERROR, "Device %s is incomplete",
 			    sc->geom->name);
 			goto bailout;
 		}
 		error = g_access(sc->components[0].gcons, 1, 0, 0);
 		KASSERT(error == 0, ("%s: g_access failed (%d)", __func__,
 		    error));
 		/* Compare the whole on-disk allocation table with what's
 		 * currently in memory */
 		while (n < sc->chunk_count) {
 			buf = g_read_data(sc->components[0].gcons, off,
 			    sc->sectorsize, &error);
 			KASSERT(buf != NULL, ("g_read_data returned NULL (%d) "
 			    "for read at %jd", error, off));
 			if (bcmp(buf, &sc->map[n], sc->sectorsize) != 0) {
 				LOG_MSG(LVL_ERROR, "ERROR in allocation table, "
 				    "entry %d, offset %jd", n, off);
 				isclean = 0;
 				count++;
 			}
 			n += sc->me_per_sector;
 			off += sc->sectorsize;
 			g_free(buf);
 		}
 		error = g_access(sc->components[0].gcons, -1, 0, 0);
 		KASSERT(error == 0, ("%s: g_access failed (%d) on exit",
 		    __func__, error));
 		if (isclean != 1) {
 			LOG_MSG(LVL_ERROR, "ALLOCATION TABLE CORRUPTED FOR %s "
 			    "(%d sectors don't match, max %zu allocations)",
 			    sc->geom->name, count,
 			    count * sc->me_per_sector);
 		} else {
 			LOG_MSG(LVL_INFO, "Allocation table ok for %s",
 			    sc->geom->name);
 		}
 bailout:
 #endif
 		update_metadata(sc);
 		virstor_geom_destroy(sc, FALSE, FALSE);
 		exitval = EAGAIN;
 	} else
 		exitval = 0;
 	return (exitval);
 }
 
 /*
  * Taste event (per-class callback)
  * Examines a provider and creates geom instances if needed
  */
 static struct g_geom *
 g_virstor_taste(struct g_class *mp, struct g_provider *pp, int flags)
 {
 	struct g_virstor_metadata md;
 	struct g_geom *gp;
 	struct g_consumer *cp;
 	struct g_virstor_softc *sc;
 	int error;
 
 	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
 	g_topology_assert();
 	LOG_MSG(LVL_DEBUG, "Tasting %s", pp->name);
 
 	/* We need a dummy geom to attach a consumer to the given provider */
 	gp = g_new_geomf(mp, "virstor:taste.helper");
 	gp->start = (void *)invalid_call;	/* XXX: hacked up so the        */
 	gp->access = (void *)invalid_call;	/* compiler doesn't complain.   */
 	gp->orphan = (void *)invalid_call;	/* I really want these to fail. */
 
 	cp = g_new_consumer(gp);
 	g_attach(cp, pp);
 	error = read_metadata(cp, &md);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 	g_destroy_geom(gp);
 
 	if (error != 0)
 		return (NULL);
 
 	if (strcmp(md.md_magic, G_VIRSTOR_MAGIC) != 0)
 		return (NULL);
 	if (md.md_version != G_VIRSTOR_VERSION) {
 		LOG_MSG(LVL_ERROR, "Kernel module version invalid "
 		    "to handle %s (%s) : %d should be %d",
 		    md.md_name, pp->name, md.md_version, G_VIRSTOR_VERSION);
 		return (NULL);
 	}
 	if (md.provsize != pp->mediasize)
 		return (NULL);
 
 	/* If the provider name is hardcoded, use the offered provider only
 	 * if it's been offered with its proper name (the one used in
 	 * the label command). */
 	if (md.provider[0] != '\0' &&
 	    !g_compare_names(md.provider, pp->name))
 		return (NULL);
 
 	/* Iterate all geoms this class already knows about to see if a new
 	 * geom instance of this class needs to be created (in case the provider
 	 * is first from a (possibly) multi-consumer geom) or it just needs
 	 * to be added to an existing instance. */
 	sc = NULL;
 	gp = NULL;
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		sc = gp->softc;
 		if (sc == NULL)
 			continue;
 		if (strcmp(md.md_name, sc->geom->name) != 0)
 			continue;
 		if (md.md_id != sc->id)
 			continue;
 		break;
 	}
 	if (gp != NULL) { /* We found an existing geom instance; add to it */
 		LOG_MSG(LVL_INFO, "Adding %s to %s", pp->name, md.md_name);
 		error = add_provider_to_geom(sc, pp, &md);
 		if (error != 0) {
 			LOG_MSG(LVL_ERROR, "Error adding %s to %s (error %d)",
 			    pp->name, md.md_name, error);
 			return (NULL);
 		}
 	} else { /* New geom instance needs to be created */
 		gp = create_virstor_geom(mp, &md);
 		if (gp == NULL) {
 			LOG_MSG(LVL_ERROR, "Error creating new instance of "
 			    "class %s: %s", mp->name, md.md_name);
 			LOG_MSG(LVL_DEBUG, "Error creating %s at %s",
 			    md.md_name, pp->name);
 			return (NULL);
 		}
 		sc = gp->softc;
 		LOG_MSG(LVL_INFO, "Adding %s to %s (first found)", pp->name,
 		    md.md_name);
 		error = add_provider_to_geom(sc, pp, &md);
 		if (error != 0) {
 			LOG_MSG(LVL_ERROR, "Error adding %s to %s (error %d)",
 			    pp->name, md.md_name, error);
 			virstor_geom_destroy(sc, TRUE, FALSE);
 			return (NULL);
 		}
 	}
 
 	return (gp);
 }
 
 /*
  * Destroyes consumer passed to it in arguments. Used as a callback
  * on g_event queue.
  */
 static void
 delay_destroy_consumer(void *arg, int flags __unused)
 {
 	struct g_consumer *c = arg;
 	KASSERT(c != NULL, ("%s: invalid consumer", __func__));
 	LOG_MSG(LVL_DEBUG, "Consumer %s destroyed with delay",
 	    c->provider->name);
 	g_detach(c);
 	g_destroy_consumer(c);
 }
 
 /*
  * Remove a component (consumer) from geom instance; If it's the first
  * component being removed, orphan the provider to announce geom's being
  * dismantled
  */
 static void
 remove_component(struct g_virstor_softc *sc, struct g_virstor_component *comp,
     boolean_t delay)
 {
 	struct g_consumer *c;
 
 	KASSERT(comp->gcons != NULL, ("Component with no consumer in %s",
 	    sc->geom->name));
 	c = comp->gcons;
 
 	comp->gcons = NULL;
 	KASSERT(c->provider != NULL, ("%s: no provider", __func__));
 	LOG_MSG(LVL_DEBUG, "Component %s removed from %s", c->provider->name,
 	    sc->geom->name);
 	if (sc->provider != NULL) {
 		LOG_MSG(LVL_INFO, "Removing provider %s", sc->provider->name);
 		g_wither_provider(sc->provider, ENXIO);
 		sc->provider = NULL;
 	}
 
 	if (c->acr > 0 || c->acw > 0 || c->ace > 0)
 		g_access(c, -c->acr, -c->acw, -c->ace);
 	if (delay) {
 		/* Destroy consumer after it's tasted */
 		g_post_event(delay_destroy_consumer, c, M_WAITOK, NULL);
 	} else {
 		g_detach(c);
 		g_destroy_consumer(c);
 	}
 }
 
 /*
  * Destroy geom - called internally
  * See g_virstor_destroy_geom for the other one
  */
 static int
 virstor_geom_destroy(struct g_virstor_softc *sc, boolean_t force,
     boolean_t delay)
 {
 	struct g_provider *pp;
 	struct g_geom *gp;
 	u_int n;
 
 	g_topology_assert();
 
 	if (sc == NULL)
 		return (ENXIO);
 
 	pp = sc->provider;
 	if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
 		LOG_MSG(force ? LVL_WARNING : LVL_ERROR,
 		    "Device %s is still open.", pp->name);
 		if (!force)
 			return (EBUSY);
 	}
 
 	for (n = 0; n < sc->n_components; n++) {
 		if (sc->components[n].gcons != NULL)
 			remove_component(sc, &sc->components[n], delay);
 	}
 
 	gp = sc->geom;
 	gp->softc = NULL;
 
 	KASSERT(sc->provider == NULL, ("Provider still exists for %s",
 	    gp->name));
 
 	/* XXX: This might or might not work, since we're called with
 	 * the topology lock held. Also, it might panic the kernel if
 	 * the error'd BIO is in softupdates code. */
 	mtx_lock(&sc->delayed_bio_q_mtx);
 	while (!STAILQ_EMPTY(&sc->delayed_bio_q)) {
 		struct g_virstor_bio_q *bq;
 		bq = STAILQ_FIRST(&sc->delayed_bio_q);
 		bq->bio->bio_error = ENOSPC;
 		g_io_deliver(bq->bio, EIO);
 		STAILQ_REMOVE_HEAD(&sc->delayed_bio_q, linkage);
 		free(bq, M_GVIRSTOR);
 	}
 	mtx_unlock(&sc->delayed_bio_q_mtx);
 	mtx_destroy(&sc->delayed_bio_q_mtx);
 
 	free(sc->map, M_GVIRSTOR);
 	free(sc->components, M_GVIRSTOR);
 	bzero(sc, sizeof *sc);
 	free(sc, M_GVIRSTOR);
 
 	pp = LIST_FIRST(&gp->provider); /* We only offer one provider */
 	if (pp == NULL || (pp->acr == 0 && pp->acw == 0 && pp->ace == 0))
 		LOG_MSG(LVL_DEBUG, "Device %s destroyed", gp->name);
 
 	g_wither_geom(gp, ENXIO);
 
 	return (0);
 }
 
 /*
  * Utility function: read metadata & decode. Wants topology lock to be
  * held.
  */
 static int
 read_metadata(struct g_consumer *cp, struct g_virstor_metadata *md)
 {
 	struct g_provider *pp;
 	char *buf;
 	int error;
 
 	g_topology_assert();
 	error = g_access(cp, 1, 0, 0);
 	if (error != 0)
 		return (error);
 	pp = cp->provider;
 	g_topology_unlock();
 	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
 	    &error);
 	g_topology_lock();
 	g_access(cp, -1, 0, 0);
 	if (buf == NULL)
 		return (error);
 
 	virstor_metadata_decode(buf, md);
 	g_free(buf);
 
 	return (0);
 }
 
 /**
  * Utility function: encode & write metadata. Assumes topology lock is
  * held.
  *
  * There is no useful way of recovering from errors in this function,
  * not involving panicking the kernel. If the metadata cannot be written
  * the most we can do is notify the operator and hope he spots it and
  * replaces the broken drive.
  */
 static void
 write_metadata(struct g_consumer *cp, struct g_virstor_metadata *md)
 {
 	struct g_provider *pp;
 	char *buf;
 	int error;
 
 	KASSERT(cp != NULL && md != NULL && cp->provider != NULL,
 	    ("Something's fishy in %s", __func__));
 	LOG_MSG(LVL_DEBUG, "Writing metadata on %s", cp->provider->name);
 	g_topology_assert();
 	error = g_access(cp, 0, 1, 0);
 	if (error != 0) {
 		LOG_MSG(LVL_ERROR, "g_access(0,1,0) failed for %s: %d",
 		    cp->provider->name, error);
 		return;
 	}
 	pp = cp->provider;
 
 	buf = malloc(pp->sectorsize, M_GVIRSTOR, M_WAITOK);
 	bzero(buf, pp->sectorsize);
 	virstor_metadata_encode(md, buf);
 	g_topology_unlock();
 	error = g_write_data(cp, pp->mediasize - pp->sectorsize, buf,
 	    pp->sectorsize);
 	g_topology_lock();
 	g_access(cp, 0, -1, 0);
 	free(buf, M_GVIRSTOR);
 
 	if (error != 0)
 		LOG_MSG(LVL_ERROR, "Error %d writing metadata to %s",
 		    error, cp->provider->name);
 }
 
 /*
  * Creates a new instance of this GEOM class, initialise softc
  */
 static struct g_geom *
 create_virstor_geom(struct g_class *mp, struct g_virstor_metadata *md)
 {
 	struct g_geom *gp;
 	struct g_virstor_softc *sc;
 
 	LOG_MSG(LVL_DEBUG, "Creating geom instance for %s (id=%u)",
 	    md->md_name, md->md_id);
 
 	if (md->md_count < 1 || md->md_chunk_size < 1 ||
 	    md->md_virsize < md->md_chunk_size) {
 		/* This is bogus configuration, and probably means data is
 		 * somehow corrupted. Panic, maybe? */
 		LOG_MSG(LVL_ERROR, "Nonsensical metadata information for %s",
 		    md->md_name);
 		return (NULL);
 	}
 
 	/* Check if it's already created */
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		sc = gp->softc;
 		if (sc != NULL && strcmp(sc->geom->name, md->md_name) == 0) {
 			LOG_MSG(LVL_WARNING, "Geom %s already exists",
 			    md->md_name);
 			if (sc->id != md->md_id) {
 				LOG_MSG(LVL_ERROR,
 				    "Some stale or invalid components "
 				    "exist for virstor device named %s. "
 				    "You will need to <CLEAR> all stale "
 				    "components and maybe reconfigure "
 				    "the virstor device. Tune "
 				    "kern.geom.virstor.debug sysctl up "
 				    "for more information.",
 				    sc->geom->name);
 			}
 			return (NULL);
 		}
 	}
 	gp = g_new_geomf(mp, "%s", md->md_name);
 	gp->softc = NULL; /* to circumevent races that test softc */
 
 	gp->start = g_virstor_start;
 	gp->spoiled = g_virstor_orphan;
 	gp->orphan = g_virstor_orphan;
 	gp->access = g_virstor_access;
 	gp->dumpconf = g_virstor_dumpconf;
 
 	sc = malloc(sizeof(*sc), M_GVIRSTOR, M_WAITOK | M_ZERO);
 	sc->id = md->md_id;
 	sc->n_components = md->md_count;
 	sc->components = malloc(sizeof(struct g_virstor_component) * md->md_count,
 	    M_GVIRSTOR, M_WAITOK | M_ZERO);
 	sc->chunk_size = md->md_chunk_size;
 	sc->virsize = md->md_virsize;
 	STAILQ_INIT(&sc->delayed_bio_q);
 	mtx_init(&sc->delayed_bio_q_mtx, "gvirstor_delayed_bio_q_mtx",
 	    "gvirstor", MTX_DEF | MTX_RECURSE);
 
 	sc->geom = gp;
 	sc->provider = NULL; /* virstor_check_and_run will create it */
 	gp->softc = sc;
 
 	LOG_MSG(LVL_ANNOUNCE, "Device %s created", sc->geom->name);
 
 	return (gp);
 }
 
 /*
  * Add provider to a GEOM class instance
  */
 static int
 add_provider_to_geom(struct g_virstor_softc *sc, struct g_provider *pp,
     struct g_virstor_metadata *md)
 {
 	struct g_virstor_component *component;
 	struct g_consumer *cp, *fcp;
 	struct g_geom *gp;
 	int error;
 
 	if (md->no >= sc->n_components)
 		return (EINVAL);
 
 	/* "Current" compontent */
 	component = &(sc->components[md->no]);
 	if (component->gcons != NULL)
 		return (EEXIST);
 
 	gp = sc->geom;
 	fcp = LIST_FIRST(&gp->consumer);
 
 	cp = g_new_consumer(gp);
 	error = g_attach(cp, pp);
 
 	if (error != 0) {
 		g_destroy_consumer(cp);
 		return (error);
 	}
 
 	if (fcp != NULL) {
 		if (fcp->provider->sectorsize != pp->sectorsize) {
 			/* TODO: this can be made to work */
 			LOG_MSG(LVL_ERROR, "Provider %s of %s has invalid "
 			    "sector size (%d)", pp->name, sc->geom->name,
 			    pp->sectorsize);
 			return (EINVAL);
 		}
 		if (fcp->acr > 0 || fcp->acw || fcp->ace > 0) {
 			/* Replicate access permissions from first "live" consumer
 			 * to the new one */
 			error = g_access(cp, fcp->acr, fcp->acw, fcp->ace);
 			if (error != 0) {
 				g_detach(cp);
 				g_destroy_consumer(cp);
 				return (error);
 			}
 		}
 	}
 
 	/* Bring up a new component */
 	cp->private = component;
 	component->gcons = cp;
 	component->sc = sc;
 	component->index = md->no;
 	component->chunk_count = md->chunk_count;
 	component->chunk_next = md->chunk_next;
 	component->chunk_reserved = md->chunk_reserved;
 	component->flags = md->flags;
 
 	LOG_MSG(LVL_DEBUG, "%s attached to %s", pp->name, sc->geom->name);
 
 	virstor_check_and_run(sc);
 	return (0);
 }
 
 /*
  * Check if everything's ready to create the geom provider & device entry,
  * create and start provider.
  * Called ultimately by .taste, from g_event thread
  */
 static void
 virstor_check_and_run(struct g_virstor_softc *sc)
 {
 	off_t off;
 	size_t n, count;
 	int index;
 	int error;
 
 	if (virstor_valid_components(sc) != sc->n_components)
 		return;
 
 	if (virstor_valid_components(sc) == 0) {
 		/* This is actually a candidate for panic() */
 		LOG_MSG(LVL_ERROR, "No valid components for %s?",
 		    sc->provider->name);
 		return;
 	}
 
 	sc->sectorsize = sc->components[0].gcons->provider->sectorsize;
 
 	/* Initialise allocation map from the first consumer */
 	sc->chunk_count = sc->virsize / sc->chunk_size;
 	if (sc->chunk_count * (off_t)sc->chunk_size != sc->virsize) {
 		LOG_MSG(LVL_WARNING, "Device %s truncated to %ju bytes",
 		    sc->provider->name,
 		    sc->chunk_count * (off_t)sc->chunk_size);
 	}
 	sc->map_size = sc->chunk_count * sizeof *(sc->map);
 	/* The following allocation is in order of 4MB - 8MB */
 	sc->map = malloc(sc->map_size, M_GVIRSTOR, M_WAITOK);
 	KASSERT(sc->map != NULL, ("%s: Memory allocation error (%zu bytes) for %s",
 	    __func__, sc->map_size, sc->provider->name));
 	sc->map_sectors = sc->map_size / sc->sectorsize;
 
 	count = 0;
 	for (n = 0; n < sc->n_components; n++)
 		count += sc->components[n].chunk_count;
 	LOG_MSG(LVL_INFO, "Device %s has %zu physical chunks and %zu virtual "
 	    "(%zu KB chunks)",
 	    sc->geom->name, count, sc->chunk_count, sc->chunk_size / 1024);
 
 	error = g_access(sc->components[0].gcons, 1, 0, 0);
 	if (error != 0) {
 		LOG_MSG(LVL_ERROR, "Cannot acquire read access for %s to "
 		    "read allocation map for %s",
 		    sc->components[0].gcons->provider->name,
 		    sc->geom->name);
 		return;
 	}
 	/* Read in the allocation map */
 	LOG_MSG(LVL_DEBUG, "Reading map for %s from %s", sc->geom->name,
 	    sc->components[0].gcons->provider->name);
 	off = count = n = 0;
 	while (count < sc->map_size) {
 		struct g_virstor_map_entry *mapbuf;
 		size_t bs;
 
 		bs = MIN(MAXPHYS, sc->map_size - count);
 		if (bs % sc->sectorsize != 0) {
 			/* Check for alignment errors */
 			bs = rounddown(bs, sc->sectorsize);
 			if (bs == 0)
 				break;
 			LOG_MSG(LVL_ERROR, "Trouble: map is not sector-aligned "
 			    "for %s on %s", sc->geom->name,
 			    sc->components[0].gcons->provider->name);
 		}
 		mapbuf = g_read_data(sc->components[0].gcons, off, bs, &error);
 		if (mapbuf == NULL) {
 			free(sc->map, M_GVIRSTOR);
 			LOG_MSG(LVL_ERROR, "Error reading allocation map "
 			    "for %s from %s (offset %ju) (error %d)",
 			    sc->geom->name,
 			    sc->components[0].gcons->provider->name,
 			    off, error);
 			return;
 		}
 
 		bcopy(mapbuf, &sc->map[n], bs);
 		off += bs;
 		count += bs;
 		n += bs / sizeof *(sc->map);
 		g_free(mapbuf);
 	}
 	g_access(sc->components[0].gcons, -1, 0, 0);
 	LOG_MSG(LVL_DEBUG, "Read map for %s", sc->geom->name);
 
 	/* find first component with allocatable chunks */
 	index = -1;
 	for (n = 0; n < sc->n_components; n++) {
 		if (sc->components[n].chunk_next <
 		    sc->components[n].chunk_count) {
 			index = n;
 			break;
 		}
 	}
 	if (index == -1)
 		/* not found? set it to the last component and handle it
 		 * later */
 		index = sc->n_components - 1;
 
 	if (index >= sc->n_components - g_virstor_component_watermark - 1) {
 		LOG_MSG(LVL_WARNING, "Device %s running out of components "
 		    "(%d/%u: %s)", sc->geom->name,
 		    index+1,
 		    sc->n_components,
 		    sc->components[index].gcons->provider->name);
 	}
 	sc->curr_component = index;
 
 	if (sc->components[index].chunk_next >=
 	    sc->components[index].chunk_count - g_virstor_chunk_watermark) {
 		LOG_MSG(LVL_WARNING,
 		    "Component %s of %s is running out of free space "
 		    "(%u chunks left)",
 		    sc->components[index].gcons->provider->name,
 		    sc->geom->name, sc->components[index].chunk_count -
 		    sc->components[index].chunk_next);
 	}
 
 	sc->me_per_sector = sc->sectorsize / sizeof *(sc->map);
 	if (sc->sectorsize % sizeof *(sc->map) != 0) {
 		LOG_MSG(LVL_ERROR,
 		    "%s: Map entries don't fit exactly in a sector (%s)",
 		    __func__, sc->geom->name);
 		return;
 	}
 
 	/* Recalculate allocated chunks in components & at the same time
 	 * verify map data is sane. We could trust metadata on this, but
 	 * we want to make sure. */
 	for (n = 0; n < sc->n_components; n++)
 		sc->components[n].chunk_next = sc->components[n].chunk_reserved;
 
 	for (n = 0; n < sc->chunk_count; n++) {
 		if (sc->map[n].provider_no >= sc->n_components ||
 			sc->map[n].provider_chunk >=
 			sc->components[sc->map[n].provider_no].chunk_count) {
 			LOG_MSG(LVL_ERROR, "%s: Invalid entry %u in map for %s",
 			    __func__, (u_int)n, sc->geom->name);
 			LOG_MSG(LVL_ERROR, "%s: provider_no: %u, n_components: %u"
 			    " provider_chunk: %u, chunk_count: %u", __func__,
 			    sc->map[n].provider_no, sc->n_components,
 			    sc->map[n].provider_chunk,
 			    sc->components[sc->map[n].provider_no].chunk_count);
 			return;
 		}
 		if (sc->map[n].flags & VIRSTOR_MAP_ALLOCATED)
 			sc->components[sc->map[n].provider_no].chunk_next++;
 	}
 
 	sc->provider = g_new_providerf(sc->geom, "virstor/%s",
 	    sc->geom->name);
 
 	sc->provider->sectorsize = sc->sectorsize;
 	sc->provider->mediasize = sc->virsize;
 	g_error_provider(sc->provider, 0);
 
 	LOG_MSG(LVL_INFO, "%s activated", sc->provider->name);
 	LOG_MSG(LVL_DEBUG, "%s starting with current component %u, starting "
 	    "chunk %u", sc->provider->name, sc->curr_component,
 	    sc->components[sc->curr_component].chunk_next);
 }
 
 /*
  * Returns count of active providers in this geom instance
  */
 static u_int
 virstor_valid_components(struct g_virstor_softc *sc)
 {
 	unsigned int nc, i;
 
 	nc = 0;
 	KASSERT(sc != NULL, ("%s: softc is NULL", __func__));
 	KASSERT(sc->components != NULL, ("%s: sc->components is NULL", __func__));
 	for (i = 0; i < sc->n_components; i++)
 		if (sc->components[i].gcons != NULL)
 			nc++;
 	return (nc);
 }
 
 /*
  * Called when the consumer gets orphaned (?)
  */
 static void
 g_virstor_orphan(struct g_consumer *cp)
 {
 	struct g_virstor_softc *sc;
 	struct g_virstor_component *comp;
 	struct g_geom *gp;
 
 	g_topology_assert();
 	gp = cp->geom;
 	sc = gp->softc;
 	if (sc == NULL)
 		return;
 
 	comp = cp->private;
 	KASSERT(comp != NULL, ("%s: No component in private part of consumer",
 	    __func__));
 	remove_component(sc, comp, FALSE);
 	if (virstor_valid_components(sc) == 0)
 		virstor_geom_destroy(sc, TRUE, FALSE);
 }
 
 /*
  * Called to notify geom when it's been opened, and for what intent
  */
 static int
 g_virstor_access(struct g_provider *pp, int dr, int dw, int de)
 {
 	struct g_consumer *c;
 	struct g_virstor_softc *sc;
 	struct g_geom *gp;
 	int error;
 
 	KASSERT(pp != NULL, ("%s: NULL provider", __func__));
 	gp = pp->geom;
 	KASSERT(gp != NULL, ("%s: NULL geom", __func__));
 	sc = gp->softc;
 
 	if (sc == NULL) {
 		/* It seems that .access can be called with negative dr,dw,dx
 		 * in this case but I want to check for myself */
 		LOG_MSG(LVL_WARNING, "access(%d, %d, %d) for %s",
 		    dr, dw, de, pp->name);
 		/* This should only happen when geom is withered so
 		 * allow only negative requests */
 		KASSERT(dr <= 0 && dw <= 0 && de <= 0,
 		    ("%s: Positive access for %s", __func__, pp->name));
 		if (pp->acr + dr == 0 && pp->acw + dw == 0 && pp->ace + de == 0)
 			LOG_MSG(LVL_DEBUG, "Device %s definitely destroyed",
 			    pp->name);
 		return (0);
 	}
 
 	/* Grab an exclusive bit to propagate on our consumers on first open */
 	if (pp->acr == 0 && pp->acw == 0 && pp->ace == 0)
 		de++;
 	/* ... drop it on close */
 	if (pp->acr + dr == 0 && pp->acw + dw == 0 && pp->ace + de == 0) {
 		de--;
 		update_metadata(sc);	/* Writes statistical information */
 	}
 
 	error = ENXIO;
 	LIST_FOREACH(c, &gp->consumer, consumer) {
 		KASSERT(c != NULL, ("%s: consumer is NULL", __func__));
 		error = g_access(c, dr, dw, de);
 		if (error != 0) {
 			struct g_consumer *c2;
 
 			/* Backout earlier changes */
 			LIST_FOREACH(c2, &gp->consumer, consumer) {
 				if (c2 == c) /* all eariler components fixed */
 					return (error);
 				g_access(c2, -dr, -dw, -de);
 			}
 		}
 	}
 
 	return (error);
 }
 
 /*
  * Generate XML dump of current state
  */
 static void
 g_virstor_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
     struct g_consumer *cp, struct g_provider *pp)
 {
 	struct g_virstor_softc *sc;
 
 	g_topology_assert();
 	sc = gp->softc;
 
 	if (sc == NULL || pp != NULL)
 		return;
 
 	if (cp != NULL) {
 		/* For each component */
 		struct g_virstor_component *comp;
 
 		comp = cp->private;
 		if (comp == NULL)
 			return;
 		sbuf_printf(sb, "%s<ComponentIndex>%u</ComponentIndex>\n",
 		    indent, comp->index);
 		sbuf_printf(sb, "%s<ChunkCount>%u</ChunkCount>\n",
 		    indent, comp->chunk_count);
 		sbuf_printf(sb, "%s<ChunksUsed>%u</ChunksUsed>\n",
 		    indent, comp->chunk_next);
 		sbuf_printf(sb, "%s<ChunksReserved>%u</ChunksReserved>\n",
 		    indent, comp->chunk_reserved);
 		sbuf_printf(sb, "%s<StorageFree>%u%%</StorageFree>\n",
 		    indent,
 		    comp->chunk_next > 0 ? 100 -
 		    ((comp->chunk_next + comp->chunk_reserved) * 100) /
 		    comp->chunk_count : 100);
 	} else {
 		/* For the whole thing */
 		u_int count, used, i;
 		off_t size;
 
 		count = used = size = 0;
 		for (i = 0; i < sc->n_components; i++) {
 			if (sc->components[i].gcons != NULL) {
 				count += sc->components[i].chunk_count;
 				used += sc->components[i].chunk_next +
 				    sc->components[i].chunk_reserved;
 				size += sc->components[i].gcons->
 				    provider->mediasize;
 			}
 		}
 
 		sbuf_printf(sb, "%s<Status>"
 		    "Components=%u, Online=%u</Status>\n", indent,
 		    sc->n_components, virstor_valid_components(sc));
 		sbuf_printf(sb, "%s<State>%u%% physical free</State>\n",
 		    indent, 100-(used * 100) / count);
 		sbuf_printf(sb, "%s<ChunkSize>%zu</ChunkSize>\n", indent,
 		    sc->chunk_size);
 		sbuf_printf(sb, "%s<PhysicalFree>%u%%</PhysicalFree>\n",
 		    indent, used > 0 ? 100 - (used * 100) / count : 100);
 		sbuf_printf(sb, "%s<ChunkPhysicalCount>%u</ChunkPhysicalCount>\n",
 		    indent, count);
 		sbuf_printf(sb, "%s<ChunkVirtualCount>%zu</ChunkVirtualCount>\n",
 		    indent, sc->chunk_count);
 		sbuf_printf(sb, "%s<PhysicalBacking>%zu%%</PhysicalBacking>\n",
 		    indent,
 		    (count * 100) / sc->chunk_count);
 		sbuf_printf(sb, "%s<PhysicalBackingSize>%jd</PhysicalBackingSize>\n",
 		    indent, size);
 		sbuf_printf(sb, "%s<VirtualSize>%jd</VirtualSize>\n", indent,
 		    sc->virsize);
 	}
 }
 
 /*
  * GEOM .done handler
  * Can't use standard handler because one requested IO may
  * fork into additional data IOs
  */
 static void
 g_virstor_done(struct bio *b)
 {
 	struct g_virstor_softc *sc;
 	struct bio *parent_b;
 
 	parent_b = b->bio_parent;
 	sc = parent_b->bio_to->geom->softc;
 
 	if (b->bio_error != 0) {
 		LOG_MSG(LVL_ERROR, "Error %d for offset=%ju, length=%ju, %s",
 		    b->bio_error, b->bio_offset, b->bio_length,
 		    b->bio_to->name);
 		if (parent_b->bio_error == 0)
 			parent_b->bio_error = b->bio_error;
 	}
 
 	parent_b->bio_inbed++;
 	parent_b->bio_completed += b->bio_completed;
 
 	if (parent_b->bio_children == parent_b->bio_inbed) {
 		parent_b->bio_completed = parent_b->bio_length;
 		g_io_deliver(parent_b, parent_b->bio_error);
 	}
 	g_destroy_bio(b);
 }
 
 /*
  * I/O starts here
  * Called in g_down thread
  */
 static void
 g_virstor_start(struct bio *b)
 {
 	struct g_virstor_softc *sc;
 	struct g_virstor_component *comp;
 	struct bio *cb;
 	struct g_provider *pp;
 	char *addr;
 	off_t offset, length;
 	struct bio_queue_head bq;
 	size_t chunk_size;	/* cached for convenience */
 	u_int count;
 
 	pp = b->bio_to;
 	sc = pp->geom->softc;
 	KASSERT(sc != NULL, ("%s: no softc (error=%d, device=%s)", __func__,
 	    b->bio_to->error, b->bio_to->name));
 
 	LOG_REQ(LVL_MOREDEBUG, b, "%s", __func__);
 
 	switch (b->bio_cmd) {
 	case BIO_READ:
 	case BIO_WRITE:
 	case BIO_DELETE:
 		break;
 	default:
 		g_io_deliver(b, EOPNOTSUPP);
 		return;
 	}
 
 	LOG_MSG(LVL_DEBUG2, "BIO arrived, size=%ju", b->bio_length);
 	bioq_init(&bq);
 
 	chunk_size = sc->chunk_size;
 	addr = b->bio_data;
 	offset = b->bio_offset;	/* virtual offset and length */
 	length = b->bio_length;
 
 	while (length > 0) {
 		size_t chunk_index, in_chunk_offset, in_chunk_length;
 		struct virstor_map_entry *me;
 
 		chunk_index = offset / chunk_size; /* round downwards */
 		in_chunk_offset = offset % chunk_size;
 		in_chunk_length = min(length, chunk_size - in_chunk_offset);
 		LOG_MSG(LVL_DEBUG, "Mapped %s(%ju, %ju) to (%zu,%zu,%zu)",
 		    b->bio_cmd == BIO_READ ? "R" : "W",
 		    offset, length,
 		    chunk_index, in_chunk_offset, in_chunk_length);
 		me = &sc->map[chunk_index];
 
 		if (b->bio_cmd == BIO_READ || b->bio_cmd == BIO_DELETE) {
 			if ((me->flags & VIRSTOR_MAP_ALLOCATED) == 0) {
 				/* Reads from unallocated chunks return zeroed
 				 * buffers */
 				if (b->bio_cmd == BIO_READ)
 					bzero(addr, in_chunk_length);
 			} else {
 				comp = &sc->components[me->provider_no];
 
 				cb = g_clone_bio(b);
 				if (cb == NULL) {
 					bioq_dismantle(&bq);
 					if (b->bio_error == 0)
 						b->bio_error = ENOMEM;
 					g_io_deliver(b, b->bio_error);
 					return;
 				}
 				cb->bio_to = comp->gcons->provider;
 				cb->bio_done = g_virstor_done;
 				cb->bio_offset =
 				    (off_t)me->provider_chunk * (off_t)chunk_size
 				    + in_chunk_offset;
 				cb->bio_length = in_chunk_length;
 				cb->bio_data = addr;
 				cb->bio_caller1 = comp;
 				bioq_disksort(&bq, cb);
 			}
 		} else { /* handle BIO_WRITE */
 			KASSERT(b->bio_cmd == BIO_WRITE,
 			    ("%s: Unknown command %d", __func__,
 			    b->bio_cmd));
 
 			if ((me->flags & VIRSTOR_MAP_ALLOCATED) == 0) {
 				/* We have a virtual chunk, represented by
 				 * the "me" entry, but it's not yet allocated
 				 * (tied to) a physical chunk. So do it now. */
 				struct virstor_map_entry *data_me;
 				u_int phys_chunk, comp_no;
 				off_t s_offset;
 				int error;
 
 				error = allocate_chunk(sc, &comp, &comp_no,
 				    &phys_chunk);
 				if (error != 0) {
 					/* We cannot allocate a physical chunk
 					 * to satisfy this request, so we'll
 					 * delay it to when we can...
 					 * XXX: this will prevent the fs from
 					 * being umounted! */
 					struct g_virstor_bio_q *biq;
 					biq = malloc(sizeof *biq, M_GVIRSTOR,
 					    M_NOWAIT);
 					if (biq == NULL) {
 						bioq_dismantle(&bq);
 						if (b->bio_error == 0)
 							b->bio_error = ENOMEM;
 						g_io_deliver(b, b->bio_error);
 						return;
 					}
 					biq->bio = b;
 					mtx_lock(&sc->delayed_bio_q_mtx);
 					STAILQ_INSERT_TAIL(&sc->delayed_bio_q,
 					    biq, linkage);
 					mtx_unlock(&sc->delayed_bio_q_mtx);
 					LOG_MSG(LVL_WARNING, "Delaying BIO "
 					    "(size=%ju) until free physical "
 					    "space can be found on %s",
 					    b->bio_length,
 					    sc->provider->name);
 					return;
 				}
 				LOG_MSG(LVL_DEBUG, "Allocated chunk %u on %s "
 				    "for %s",
 				    phys_chunk,
 				    comp->gcons->provider->name,
 				    sc->provider->name);
 
 				me->provider_no = comp_no;
 				me->provider_chunk = phys_chunk;
 				me->flags |= VIRSTOR_MAP_ALLOCATED;
 
 				cb = g_clone_bio(b);
 				if (cb == NULL) {
 					me->flags &= ~VIRSTOR_MAP_ALLOCATED;
 					me->provider_no = 0;
 					me->provider_chunk = 0;
 					bioq_dismantle(&bq);
 					if (b->bio_error == 0)
 						b->bio_error = ENOMEM;
 					g_io_deliver(b, b->bio_error);
 					return;
 				}
 
 				/* The allocation table is stored continuously
 				 * at the start of the drive. We need to
 				 * calculate the offset of the sector that holds
 				 * this map entry both on the drive and in the
 				 * map array.
 				 * sc_offset will end up pointing to the drive
 				 * sector. */
 				s_offset = chunk_index * sizeof *me;
 				s_offset = rounddown(s_offset, sc->sectorsize);
 
 				/* data_me points to map entry sector
 				 * in memory (analogous to offset) */
 				data_me = &sc->map[rounddown(chunk_index,
 				    sc->me_per_sector)];
 
 				/* Commit sector with map entry to storage */
 				cb->bio_to = sc->components[0].gcons->provider;
 				cb->bio_done = g_virstor_done;
 				cb->bio_offset = s_offset;
 				cb->bio_data = (char *)data_me;
 				cb->bio_length = sc->sectorsize;
 				cb->bio_caller1 = &sc->components[0];
 				bioq_disksort(&bq, cb);
 			}
 
 			comp = &sc->components[me->provider_no];
 			cb = g_clone_bio(b);
 			if (cb == NULL) {
 				bioq_dismantle(&bq);
 				if (b->bio_error == 0)
 					b->bio_error = ENOMEM;
 				g_io_deliver(b, b->bio_error);
 				return;
 			}
 			/* Finally, handle the data */
 			cb->bio_to = comp->gcons->provider;
 			cb->bio_done = g_virstor_done;
 			cb->bio_offset = (off_t)me->provider_chunk*(off_t)chunk_size +
 			    in_chunk_offset;
 			cb->bio_length = in_chunk_length;
 			cb->bio_data = addr;
 			cb->bio_caller1 = comp;
 			bioq_disksort(&bq, cb);
 		}
 		addr += in_chunk_length;
 		length -= in_chunk_length;
 		offset += in_chunk_length;
 	}
 
 	/* Fire off bio's here */
 	count = 0;
 	for (cb = bioq_first(&bq); cb != NULL; cb = bioq_first(&bq)) {
 		bioq_remove(&bq, cb);
 		LOG_REQ(LVL_MOREDEBUG, cb, "Firing request");
 		comp = cb->bio_caller1;
 		cb->bio_caller1 = NULL;
 		LOG_MSG(LVL_DEBUG, " firing bio, offset=%ju, length=%ju",
 		    cb->bio_offset, cb->bio_length);
 		g_io_request(cb, comp->gcons);
 		count++;
 	}
 	if (count == 0) { /* We handled everything locally */
 		b->bio_completed = b->bio_length;
 		g_io_deliver(b, 0);
 	}
 
 }
 
 /*
  * Allocate a chunk from a physical provider. Returns physical component,
  * chunk index relative to the component and the component's index.
  */
 static int
 allocate_chunk(struct g_virstor_softc *sc, struct g_virstor_component **comp,
     u_int *comp_no_p, u_int *chunk)
 {
 	u_int comp_no;
 
 	KASSERT(sc->curr_component < sc->n_components,
 	    ("%s: Invalid curr_component: %u",  __func__, sc->curr_component));
 
 	comp_no = sc->curr_component;
 	*comp = &sc->components[comp_no];
 	dump_component(*comp);
 	if ((*comp)->chunk_next >= (*comp)->chunk_count) {
 		/* This component is full. Allocate next component */
 		if (comp_no >= sc->n_components-1) {
 			LOG_MSG(LVL_ERROR, "All physical space allocated for %s",
 			    sc->geom->name);
 			return (-1);
 		}
 		(*comp)->flags &= ~VIRSTOR_PROVIDER_CURRENT;
 		sc->curr_component = ++comp_no;
 
 		*comp = &sc->components[comp_no];
 		if (comp_no >= sc->n_components - g_virstor_component_watermark-1)
 			LOG_MSG(LVL_WARNING, "Device %s running out of components "
 			    "(switching to %u/%u: %s)", sc->geom->name,
 			    comp_no+1, sc->n_components,
 			    (*comp)->gcons->provider->name);
 		/* Take care not to overwrite reserved chunks */
 		if ( (*comp)->chunk_reserved > 0 &&
 		    (*comp)->chunk_next < (*comp)->chunk_reserved)
 			(*comp)->chunk_next = (*comp)->chunk_reserved;
 
 		(*comp)->flags |=
 		    VIRSTOR_PROVIDER_ALLOCATED | VIRSTOR_PROVIDER_CURRENT;
 		dump_component(*comp);
 		*comp_no_p = comp_no;
 		*chunk = (*comp)->chunk_next++;
 	} else {
 		*comp_no_p = comp_no;
 		*chunk = (*comp)->chunk_next++;
 	}
 	return (0);
 }
 
 /* Dump a component */
 static void
 dump_component(struct g_virstor_component *comp)
 {
 
 	if (g_virstor_debug < LVL_DEBUG2)
 		return;
 	printf("Component %d: %s\n", comp->index, comp->gcons->provider->name);
 	printf("  chunk_count: %u\n", comp->chunk_count);
 	printf("   chunk_next: %u\n", comp->chunk_next);
 	printf("        flags: %u\n", comp->flags);
 }
 
 #if 0
 /* Dump a map entry */
 static void
 dump_me(struct virstor_map_entry *me, unsigned int nr)
 {
 	if (g_virstor_debug < LVL_DEBUG)
 		return;
 	printf("VIRT. CHUNK #%d: ", nr);
 	if ((me->flags & VIRSTOR_MAP_ALLOCATED) == 0)
 		printf("(unallocated)\n");
 	else
 		printf("allocated at provider %u, provider_chunk %u\n",
 		    me->provider_no, me->provider_chunk);
 }
 #endif
 
 /*
  * Dismantle bio_queue and destroy its components
  */
 static void
 bioq_dismantle(struct bio_queue_head *bq)
 {
 	struct bio *b;
 
 	for (b = bioq_first(bq); b != NULL; b = bioq_first(bq)) {
 		bioq_remove(bq, b);
 		g_destroy_bio(b);
 	}
 }
 
 /*
  * The function that shouldn't be called.
  * When this is called, the stack is already garbled because of
  * argument mismatch. There's nothing to do now but panic, which is
  * accidentally the whole purpose of this function.
  * Motivation: to guard from accidentally calling geom methods when
  * they shouldn't be called. (see g_..._taste)
  */
 static void
 invalid_call(void)
 {
 	panic("invalid_call() has just been called. Something's fishy here.");
 }
 
 DECLARE_GEOM_CLASS(g_virstor_class, g_virstor); /* Let there be light */
 MODULE_VERSION(geom_virstor, 0);
Index: head/sys/geom/virstor/g_virstor.h
===================================================================
--- head/sys/geom/virstor/g_virstor.h	(revision 350693)
+++ head/sys/geom/virstor/g_virstor.h	(revision 350694)
@@ -1,137 +1,119 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2006-2007 Ivan Voras <ivoras@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _G_VIRSTOR_H_
 #define _G_VIRSTOR_H_
 
 #define	G_VIRSTOR_CLASS_NAME "VIRSTOR"
 
 
 #define VIRSTOR_MAP_ALLOCATED 1
 struct virstor_map_entry {
 	uint16_t	flags;
 	uint16_t	provider_no;
 	uint32_t	provider_chunk;
 };
 
 #define	VIRSTOR_MAP_ENTRY_SIZE (sizeof(struct virstor_map_entry))
 #define	VIRSTOR_MAP_BLOCK_ENTRIES (MAXPHYS / VIRSTOR_MAP_ENTRY_SIZE)
 /* Struct size is guarded by CTASSERT in main source */
 
 #ifdef _KERNEL
 
-#define	LOG_MSG(lvl, ...)       do {					\
-        if (g_virstor_debug >= (lvl)) {					\
-                printf("GEOM_" G_VIRSTOR_CLASS_NAME);			\
-                if ((lvl) > 0)						\
-                        printf("[%u]", (lvl));				\
-                printf(": ");						\
-                printf(__VA_ARGS__);					\
-                printf("\n");						\
-        }								\
-} while (0)
+#define	LOG_MSG(lvl, ...) \
+    _GEOM_DEBUG("GEOM_VIRSTOR", g_virstor_debug, (lvl), NULL, __VA_ARGS__)
 #define	LOG_MESSAGE LOG_MSG
 
-#define	LOG_REQ(lvl, bp, ...)  do {					\
-        if (g_virstor_debug >= (lvl)) {					\
-                printf("GEOM_" G_VIRSTOR_CLASS_NAME);			\
-                if ((lvl) > 0)						\
-                        printf("[%u]", (lvl));				\
-                printf(": ");						\
-                printf(__VA_ARGS__);					\
-                printf(" ");						\
-                g_print_bio(bp);					\
-                printf("\n");						\
-        }								\
-} while (0)
+#define	LOG_REQ(lvl, bp, ...) \
+    _GEOM_DEBUG("GEOM_VIRSTOR", g_virstor_debug, (lvl), (bp), __VA_ARGS__)
 #define	LOG_REQUEST LOG_REQ
 
 /* "critical" system announcements (e.g. "geom is up") */
 #define	LVL_ANNOUNCE	0
 /* errors */
 #define	LVL_ERROR	1
 /* warnings */
 #define	LVL_WARNING	2
 /* info, noncritical for system operation (user doesn't have to see it */
 #define	LVL_INFO	5
 /* debug info */
 #define	LVL_DEBUG	10
 /* more debug info */
 #define	LVL_DEBUG2	12
 /* superfluous debug info (large volumes of data) */
 #define	LVL_MOREDEBUG	15
 
 
 /* Component data */
 struct g_virstor_component {
 	struct g_consumer	*gcons;
 	struct g_virstor_softc	*sc;
 	unsigned int		 index;		/* Component index in array */
 	unsigned int		 chunk_count;
 	unsigned int		 chunk_next;
 	unsigned int		 chunk_reserved;
 	unsigned int		 flags;
 };
 
 
 /* Internal geom instance data */
 struct g_virstor_softc {
 	struct g_geom		*geom;
 	struct g_provider	*provider;
 	struct g_virstor_component *components;
 	u_int			 n_components;
 	u_int			 curr_component; /* Component currently used */
 	uint32_t		 id;		/* Unique ID of this geom */
 	off_t			 virsize;	/* Total size of virstor */
 	off_t			 sectorsize;
 	size_t			 chunk_size;
 	size_t			 chunk_count;	/* governs map_size */
 	struct virstor_map_entry *map;
 	size_t			 map_size;	/* (in bytes) */
 	size_t			 map_sectors;	/* Size of map in sectors */
 	size_t			 me_per_sector;	/* # map entries in a sector */
 	STAILQ_HEAD(, g_virstor_bio_q)	 delayed_bio_q;	/* Queue of delayed BIOs */
 	struct mtx		 delayed_bio_q_mtx;
 };
 
 /* "delayed BIOs" Queue element */
 struct g_virstor_bio_q {
 	struct bio		*bio;
 	STAILQ_ENTRY(g_virstor_bio_q) linkage;
 };
 
 
 #endif	/* _KERNEL */
 
 #ifndef _PATH_DEV
 #define _PATH_DEV "/dev/"
 #endif
 
 #endif	/* !_G_VIRSTOR_H_ */