Index: projects/clang391-import/bin/kenv/kenv.c
===================================================================
--- projects/clang391-import/bin/kenv/kenv.c	(revision 309262)
+++ projects/clang391-import/bin/kenv/kenv.c	(revision 309263)
@@ -1,206 +1,206 @@
 /*-
  * Copyright (c) 2000  Peter Wemm <peter@freebsd.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/types.h>
 #include <sys/sysctl.h>
 #include <err.h>
 #include <kenv.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
 
 static void	usage(void);
 static int	kdumpenv(void);
 static int	kgetenv(const char *);
 static int	ksetenv(const char *, char *);
 static int	kunsetenv(const char *);
 
 static int hflag = 0;
 static int Nflag = 0;
 static int qflag = 0;
 static int uflag = 0;
 static int vflag = 0;
 
 static void
 usage(void)
 {
 	(void)fprintf(stderr, "%s\n%s\n%s\n",
 	    "usage: kenv [-hNq]",
 	    "       kenv [-qv] variable[=value]",
 	    "       kenv [-q] -u variable");
 	exit(1);
 }
 
 int
 main(int argc, char **argv)
 {
 	char *env, *eq, *val;
 	int ch, error;
 
-	error = 0;
 	val = NULL;
 	env = NULL;
 	while ((ch = getopt(argc, argv, "hNquv")) != -1) {
 		switch (ch) {
 		case 'h':
 			hflag++;
 			break;
 		case 'N':
 			Nflag++;
 			break;
 		case 'q':
 			qflag++;
 			break;
 		case 'u':
 			uflag++;
 			break;
 		case 'v':
 			vflag++;
 			break;
 		default:
 			usage();
 		}
 	}
 	argc -= optind;
 	argv += optind;
 	if (argc > 0) {
 		env = argv[0];
 		eq = strchr(env, '=');
 		if (eq != NULL) {
 			*eq++ = '\0';
 			val = eq;
 		}
 		argv++;
 		argc--;
 	}
 	if ((hflag || Nflag) && env != NULL)
 		usage();
 	if (argc > 0 || ((uflag || vflag) && env == NULL))
 		usage();
 	if (env == NULL) {
 		error = kdumpenv();
 		if (error && !qflag)
 			warn("kdumpenv");
 	} else if (val == NULL) {
 		if (uflag) {
 			error = kunsetenv(env);
 			if (error && !qflag)
 				warnx("unable to unset %s", env);
 		} else {
 			error = kgetenv(env);
 			if (error && !qflag)
 				warnx("unable to get %s", env);
 		}
 	} else {
 		error = ksetenv(env, val);
 		if (error && !qflag)
 			warnx("unable to set %s to %s", env, val);
 	}
 	return (error);
 }
 
 static int
 kdumpenv(void)
 {
-	char *buf, *cp;
+	char *buf, *bp, *cp;
 	int buflen, envlen;
 
 	envlen = kenv(KENV_DUMP, NULL, NULL, 0);
 	if (envlen < 0)
 		return (-1);
 	for (;;) {
 		buflen = envlen * 120 / 100;
-		buf = malloc(buflen + 1);
+		buf = calloc(1, buflen + 1);
 		if (buf == NULL)
 			return (-1);
-		memset(buf, 0, buflen + 1);	/* Be defensive */
 		envlen = kenv(KENV_DUMP, NULL, buf, buflen);
 		if (envlen < 0) {
 			free(buf);
 			return (-1);
 		}
 		if (envlen > buflen)
 			free(buf);
 		else
 			break;
 	}
 
-	for (; *buf != '\0'; buf += strlen(buf) + 1) {
+	for (bp = buf; *bp != '\0'; bp += strlen(bp) + 1) {
 		if (hflag) {
-			if (strncmp(buf, "hint.", 5) != 0)
+			if (strncmp(bp, "hint.", 5) != 0)
 				continue;
 		}
-		cp = strchr(buf, '=');
+		cp = strchr(bp, '=');
 		if (cp == NULL)
 			continue;
 		*cp++ = '\0';
 		if (Nflag)
-			printf("%s\n", buf);
+			printf("%s\n", bp);
 		else
-			printf("%s=\"%s\"\n", buf, cp);
-		buf = cp;
+			printf("%s=\"%s\"\n", bp, cp);
+		bp = cp;
 	}
+
+	free(buf);
 	return (0);
 }
 
 static int
 kgetenv(const char *env)
 {
 	char buf[1024];
 	int ret;
 
 	ret = kenv(KENV_GET, env, buf, sizeof(buf));
 	if (ret == -1)
 		return (ret);
 	if (vflag)
 		printf("%s=\"%s\"\n", env, buf);
 	else
 		printf("%s\n", buf);
 	return (0);
 }
 
 static int
 ksetenv(const char *env, char *val)
 {
 	int ret;
 
-	ret = kenv(KENV_SET, env, val, strlen(val)+1);
+	ret = kenv(KENV_SET, env, val, strlen(val) + 1);
 	if (ret == 0)
 		printf("%s=\"%s\"\n", env, val);
 	return (ret);
 }
 
 static int
 kunsetenv(const char *env)
 {
 	int ret;
-	
+
 	ret = kenv(KENV_UNSET, env, NULL, 0);
 	return (ret);
 }
Index: projects/clang391-import/release/packages/clang.ucl
===================================================================
--- projects/clang391-import/release/packages/clang.ucl	(revision 309262)
+++ projects/clang391-import/release/packages/clang.ucl	(revision 309263)
@@ -1,24 +1,24 @@
 #
 # $FreeBSD$
 #
 
 name = "FreeBSD-%PKGNAME%"
 origin = "base"
 version = "%VERSION%"
 comment = "%COMMENT%"
 categories = [ base ]
 maintainer = "re@FreeBSD.org"
 www = "https://www.FreeBSD.org"
 prefix = "/"
 licenselogic = "single"
-licenses = [ BSD2CLAUSE ]
+licenses = [ NCSA ]
 desc = <<EOD
 %DESC%
 EOD
 deps: {
     FreeBSD-%PKGDEPS%: {
         origin: "base",
         version: "%VERSION%"
     }
 }
 
Index: projects/clang391-import/release/packages/lld.ucl
===================================================================
--- projects/clang391-import/release/packages/lld.ucl	(nonexistent)
+++ projects/clang391-import/release/packages/lld.ucl	(revision 309263)
@@ -0,0 +1,24 @@
+#
+# $FreeBSD$
+#
+
+name = "FreeBSD-%PKGNAME%"
+origin = "base"
+version = "%VERSION%"
+comment = "%COMMENT%"
+categories = [ base ]
+maintainer = "re@FreeBSD.org"
+www = "https://www.FreeBSD.org"
+prefix = "/"
+licenselogic = "single"
+licenses = [ NCSA ]
+desc = <<EOD
+%DESC%
+EOD
+deps: {
+    FreeBSD-%PKGDEPS%: {
+        origin: "base",
+        version: "%VERSION%"
+    }
+}
+

Property changes on: projects/clang391-import/release/packages/lld.ucl
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Index: projects/clang391-import/release/packages/lldb.ucl
===================================================================
--- projects/clang391-import/release/packages/lldb.ucl	(nonexistent)
+++ projects/clang391-import/release/packages/lldb.ucl	(revision 309263)
@@ -0,0 +1,24 @@
+#
+# $FreeBSD$
+#
+
+name = "FreeBSD-%PKGNAME%"
+origin = "base"
+version = "%VERSION%"
+comment = "%COMMENT%"
+categories = [ base ]
+maintainer = "re@FreeBSD.org"
+www = "https://www.FreeBSD.org"
+prefix = "/"
+licenselogic = "single"
+licenses = [ NCSA ]
+desc = <<EOD
+%DESC%
+EOD
+deps: {
+    FreeBSD-%PKGDEPS%: {
+        origin: "base",
+        version: "%VERSION%"
+    }
+}
+

Property changes on: projects/clang391-import/release/packages/lldb.ucl
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Index: projects/clang391-import/sys/arm64/include/armreg.h
===================================================================
--- projects/clang391-import/sys/arm64/include/armreg.h	(revision 309262)
+++ projects/clang391-import/sys/arm64/include/armreg.h	(revision 309263)
@@ -1,508 +1,508 @@
 /*-
  * Copyright (c) 2013, 2014 Andrew Turner
  * Copyright (c) 2015 The FreeBSD Foundation
  * All rights reserved.
  *
  * This software was developed by Andrew Turner under
  * sponsorship from the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _MACHINE_ARMREG_H_
 #define	_MACHINE_ARMREG_H_
 
 #define	INSN_SIZE		4
 
 #define	READ_SPECIALREG(reg)						\
 ({	uint64_t val;							\
 	__asm __volatile("mrs	%0, " __STRING(reg) : "=&r" (val));	\
 	val;								\
 })
 #define	WRITE_SPECIALREG(reg, val)					\
 	__asm __volatile("msr	" __STRING(reg) ", %0" : : "r"((uint64_t)val))
 
 /* CNTHCTL_EL2 - Counter-timer Hypervisor Control register */
 #define	CNTHCTL_EVNTI_MASK	(0xf << 4) /* Bit to trigger event stream */
 #define	CNTHCTL_EVNTDIR		(1 << 3) /* Control transition trigger bit */
 #define	CNTHCTL_EVNTEN		(1 << 2) /* Enable event stream */
 #define	CNTHCTL_EL1PCEN		(1 << 1) /* Allow EL0/1 physical timer access */
 #define	CNTHCTL_EL1PCTEN	(1 << 0) /*Allow EL0/1 physical counter access*/
 
 /* CPACR_EL1 */
 #define	CPACR_FPEN_MASK		(0x3 << 20)
 #define	 CPACR_FPEN_TRAP_ALL1	(0x0 << 20) /* Traps from EL0 and EL1 */
 #define	 CPACR_FPEN_TRAP_EL0	(0x1 << 20) /* Traps from EL0 */
 #define	 CPACR_FPEN_TRAP_ALL2	(0x2 << 20) /* Traps from EL0 and EL1 */
 #define	 CPACR_FPEN_TRAP_NONE	(0x3 << 20) /* No traps */
 #define	CPACR_TTA		(0x1 << 28)
 
 /* CTR_EL0 - Cache Type Register */
 #define	CTR_DLINE_SHIFT		16
 #define	CTR_DLINE_MASK		(0xf << CTR_DLINE_SHIFT)
 #define	CTR_DLINE_SIZE(reg)	(((reg) & CTR_DLINE_MASK) >> CTR_DLINE_SHIFT)
 #define	CTR_ILINE_SHIFT		0
 #define	CTR_ILINE_MASK		(0xf << CTR_ILINE_SHIFT)
 #define	CTR_ILINE_SIZE(reg)	(((reg) & CTR_ILINE_MASK) >> CTR_ILINE_SHIFT)
 
 /* DCZID_EL0 - Data Cache Zero ID register */
 #define DCZID_DZP		(1 << 4) /* DC ZVA prohibited if non-0 */
 #define DCZID_BS_SHIFT		0
 #define DCZID_BS_MASK		(0xf << DCZID_BS_SHIFT)
 #define	DCZID_BS_SIZE(reg)	(((reg) & DCZID_BS_MASK) >> DCZID_BS_SHIFT)
 
 /* ESR_ELx */
 #define	ESR_ELx_ISS_MASK	0x00ffffff
 #define	 ISS_INSN_FnV		(0x01 << 10)
 #define	 ISS_INSN_EA		(0x01 << 9)
 #define	 ISS_INSN_S1PTW		(0x01 << 7)
 #define	 ISS_INSN_IFSC_MASK	(0x1f << 0)
 #define	 ISS_DATA_ISV		(0x01 << 24)
 #define	 ISS_DATA_SAS_MASK	(0x03 << 22)
 #define	 ISS_DATA_SSE		(0x01 << 21)
 #define	 ISS_DATA_SRT_MASK	(0x1f << 16)
 #define	 ISS_DATA_SF		(0x01 << 15)
 #define	 ISS_DATA_AR		(0x01 << 14)
 #define	 ISS_DATA_FnV		(0x01 << 10)
 #define	 ISS_DATa_EA		(0x01 << 9)
 #define	 ISS_DATa_CM		(0x01 << 8)
 #define	 ISS_INSN_S1PTW		(0x01 << 7)
 #define	 ISS_DATa_WnR		(0x01 << 6)
-#define	 ISS_DATA_DFSC_MASK	(0x1f << 0)
+#define	 ISS_DATA_DFSC_MASK	(0x3f << 0)
 #define	 ISS_DATA_DFSC_ASF_L0	(0x00 << 0)
 #define	 ISS_DATA_DFSC_ASF_L1	(0x01 << 0)
 #define	 ISS_DATA_DFSC_ASF_L2	(0x02 << 0)
 #define	 ISS_DATA_DFSC_ASF_L3	(0x03 << 0)
 #define	 ISS_DATA_DFSC_TF_L0	(0x04 << 0)
 #define	 ISS_DATA_DFSC_TF_L1	(0x05 << 0)
 #define	 ISS_DATA_DFSC_TF_L2	(0x06 << 0)
 #define	 ISS_DATA_DFSC_TF_L3	(0x07 << 0)
 #define	 ISS_DATA_DFSC_AFF_L1	(0x09 << 0)
 #define	 ISS_DATA_DFSC_AFF_L2	(0x0a << 0)
 #define	 ISS_DATA_DFSC_AFF_L3	(0x0b << 0)
 #define	 ISS_DATA_DFSC_PF_L1	(0x0d << 0)
 #define	 ISS_DATA_DFSC_PF_L2	(0x0e << 0)
 #define	 ISS_DATA_DFSC_PF_L3	(0x0f << 0)
 #define	 ISS_DATA_DFSC_EXT	(0x10 << 0)
 #define	 ISS_DATA_DFSC_EXT_L0	(0x14 << 0)
 #define	 ISS_DATA_DFSC_EXT_L1	(0x15 << 0)
 #define	 ISS_DATA_DFSC_EXT_L2	(0x16 << 0)
 #define	 ISS_DATA_DFSC_EXT_L3	(0x17 << 0)
 #define	 ISS_DATA_DFSC_ECC	(0x18 << 0)
 #define	 ISS_DATA_DFSC_ECC_L0	(0x1c << 0)
 #define	 ISS_DATA_DFSC_ECC_L1	(0x1d << 0)
 #define	 ISS_DATA_DFSC_ECC_L2	(0x1e << 0)
 #define	 ISS_DATA_DFSC_ECC_L3	(0x1f << 0)
 #define	 ISS_DATA_DFSC_ALIGN	(0x21 << 0)
 #define	 ISS_DATA_DFSC_TLB_CONFLICT (0x30 << 0)
 #define	ESR_ELx_IL		(0x01 << 25)
 #define	ESR_ELx_EC_SHIFT	26
 #define	ESR_ELx_EC_MASK		(0x3f << 26)
 #define	ESR_ELx_EXCEPTION(esr)	(((esr) & ESR_ELx_EC_MASK) >> ESR_ELx_EC_SHIFT)
 #define	 EXCP_UNKNOWN		0x00	/* Unkwn exception */
 #define	 EXCP_FP_SIMD		0x07	/* VFP/SIMD trap */
 #define	 EXCP_ILL_STATE		0x0e	/* Illegal execution state */
 #define	 EXCP_SVC		0x15	/* SVC trap */
 #define	 EXCP_MSR		0x18	/* MSR/MRS trap */
 #define	 EXCP_INSN_ABORT_L	0x20	/* Instruction abort, from lower EL */
 #define	 EXCP_INSN_ABORT	0x21	/* Instruction abort, from same EL */ 
 #define	 EXCP_PC_ALIGN		0x22	/* PC alignment fault */
 #define	 EXCP_DATA_ABORT_L	0x24	/* Data abort, from lower EL */
 #define	 EXCP_DATA_ABORT	0x25	/* Data abort, from same EL */ 
 #define	 EXCP_SP_ALIGN		0x26	/* SP slignment fault */
 #define	 EXCP_TRAP_FP		0x2c	/* Trapped FP exception */
 #define	 EXCP_SERROR		0x2f	/* SError interrupt */
 #define	 EXCP_SOFTSTP_EL0	0x32	/* Software Step, from lower EL */
 #define	 EXCP_SOFTSTP_EL1	0x33	/* Software Step, from same EL */
 #define	 EXCP_WATCHPT_EL1	0x35	/* Watchpoint, from same EL */
 #define	 EXCP_BRK		0x3c	/* Breakpoint */
 
 /* ICC_CTLR_EL1 */
 #define	ICC_CTLR_EL1_EOIMODE	(1U << 1)
 
 /* ICC_IAR1_EL1 */
 #define	ICC_IAR1_EL1_SPUR	(0x03ff)
 
 /* ICC_IGRPEN0_EL1 */
 #define	ICC_IGRPEN0_EL1_EN	(1U << 0)
 
 /* ICC_PMR_EL1 */
 #define	ICC_PMR_EL1_PRIO_MASK	(0xFFUL)
 
 /* ICC_SGI1R_EL1 */
 #define	ICC_SGI1R_EL1_TL_MASK		0xffffUL
 #define	ICC_SGI1R_EL1_AFF1_SHIFT	16
 #define	ICC_SGI1R_EL1_SGIID_SHIFT	24
 #define	ICC_SGI1R_EL1_AFF2_SHIFT	32
 #define	ICC_SGI1R_EL1_AFF3_SHIFT	48
 #define	ICC_SGI1R_EL1_SGIID_MASK	0xfUL
 #define	ICC_SGI1R_EL1_IRM		(0x1UL << 40)
 
 /* ICC_SRE_EL1 */
 #define	ICC_SRE_EL1_SRE		(1U << 0)
 
 /* ICC_SRE_EL2 */
 #define	ICC_SRE_EL2_SRE		(1U << 0)
 #define	ICC_SRE_EL2_EN		(1U << 3)
 
 /* ID_AA64DFR0_EL1 */
 #define	ID_AA64DFR0_MASK		0xf0f0ffff
 #define	ID_AA64DFR0_DEBUG_VER_SHIFT	0
 #define	ID_AA64DFR0_DEBUG_VER_MASK	(0xf << ID_AA64DFR0_DEBUG_VER_SHIFT)
 #define	ID_AA64DFR0_DEBUG_VER(x)	((x) & ID_AA64DFR0_DEBUG_VER_MASK)
 #define	 ID_AA64DFR0_DEBUG_VER_8	(0x6 << ID_AA64DFR0_DEBUG_VER_SHIFT)
 #define	 ID_AA64DFR0_DEBUG_VER_8_VHE	(0x7 << ID_AA64DFR0_DEBUG_VER_SHIFT)
 #define	ID_AA64DFR0_TRACE_VER_SHIFT	4
 #define	ID_AA64DFR0_TRACE_VER_MASK	(0xf << ID_AA64DFR0_TRACE_VER_SHIFT)
 #define	ID_AA64DFR0_TRACE_VER(x)	((x) & ID_AA64DFR0_TRACE_VER_MASK)
 #define	 ID_AA64DFR0_TRACE_VER_NONE	(0x0 << ID_AA64DFR0_TRACE_VER_SHIFT)
 #define	 ID_AA64DFR0_TRACE_VER_IMPL	(0x1 << ID_AA64DFR0_TRACE_VER_SHIFT)
 #define	ID_AA64DFR0_PMU_VER_SHIFT	8
 #define	ID_AA64DFR0_PMU_VER_MASK	(0xf << ID_AA64DFR0_PMU_VER_SHIFT)
 #define	ID_AA64DFR0_PMU_VER(x)		((x) & ID_AA64DFR0_PMU_VER_MASK)
 #define	 ID_AA64DFR0_PMU_VER_NONE	(0x0 << ID_AA64DFR0_PMU_VER_SHIFT)
 #define	 ID_AA64DFR0_PMU_VER_3		(0x1 << ID_AA64DFR0_PMU_VER_SHIFT)
 #define	 ID_AA64DFR0_PMU_VER_3_1	(0x4 << ID_AA64DFR0_PMU_VER_SHIFT)
 #define	 ID_AA64DFR0_PMU_VER_IMPL	(0xf << ID_AA64DFR0_PMU_VER_SHIFT)
 #define	ID_AA64DFR0_BRPS_SHIFT		12
 #define	ID_AA64DFR0_BRPS_MASK		(0xf << ID_AA64DFR0_BRPS_SHIFT)
 #define	ID_AA64DFR0_BRPS(x)		\
     ((((x) >> ID_AA64DFR0_BRPS_SHIFT) & 0xf) + 1)
 #define	ID_AA64DFR0_WRPS_SHIFT		20
 #define	ID_AA64DFR0_WRPS_MASK		(0xf << ID_AA64DFR0_WRPS_SHIFT)
 #define	ID_AA64DFR0_WRPS(x)		\
     ((((x) >> ID_AA64DFR0_WRPS_SHIFT) & 0xf) + 1)
 #define	ID_AA64DFR0_CTX_CMPS_SHIFT	28
 #define	ID_AA64DFR0_CTX_CMPS_MASK	(0xf << ID_AA64DFR0_CTX_CMPS_SHIFT)
 #define	ID_AA64DFR0_CTX_CMPS(x)		\
     ((((x) >> ID_AA64DFR0_CTX_CMPS_SHIFT) & 0xf) + 1)
 
 /* ID_AA64ISAR0_EL1 */
 #define	ID_AA64ISAR0_MASK		0xf0fffff0
 #define	ID_AA64ISAR0_AES_SHIFT		4
 #define	ID_AA64ISAR0_AES_MASK		(0xf << ID_AA64ISAR0_AES_SHIFT)
 #define	ID_AA64ISAR0_AES(x)		((x) & ID_AA64ISAR0_AES_MASK)
 #define	 ID_AA64ISAR0_AES_NONE		(0x0 << ID_AA64ISAR0_AES_SHIFT)
 #define	 ID_AA64ISAR0_AES_BASE		(0x1 << ID_AA64ISAR0_AES_SHIFT)
 #define	 ID_AA64ISAR0_AES_PMULL		(0x2 << ID_AA64ISAR0_AES_SHIFT)
 #define	ID_AA64ISAR0_SHA1_SHIFT		8
 #define	ID_AA64ISAR0_SHA1_MASK		(0xf << ID_AA64ISAR0_SHA1_SHIFT)
 #define	ID_AA64ISAR0_SHA1(x)		((x) & ID_AA64ISAR0_SHA1_MASK)
 #define	 ID_AA64ISAR0_SHA1_NONE		(0x0 << ID_AA64ISAR0_SHA1_SHIFT)
 #define	 ID_AA64ISAR0_SHA1_BASE		(0x1 << ID_AA64ISAR0_SHA1_SHIFT)
 #define	ID_AA64ISAR0_SHA2_SHIFT		12
 #define	ID_AA64ISAR0_SHA2_MASK		(0xf << ID_AA64ISAR0_SHA2_SHIFT)
 #define	ID_AA64ISAR0_SHA2(x)		((x) & ID_AA64ISAR0_SHA2_MASK)
 #define	 ID_AA64ISAR0_SHA2_NONE		(0x0 << ID_AA64ISAR0_SHA2_SHIFT)
 #define	 ID_AA64ISAR0_SHA2_BASE		(0x1 << ID_AA64ISAR0_SHA2_SHIFT)
 #define	ID_AA64ISAR0_CRC32_SHIFT	16
 #define	ID_AA64ISAR0_CRC32_MASK		(0xf << ID_AA64ISAR0_CRC32_SHIFT)
 #define	ID_AA64ISAR0_CRC32(x)		((x) & ID_AA64ISAR0_CRC32_MASK)
 #define	 ID_AA64ISAR0_CRC32_NONE	(0x0 << ID_AA64ISAR0_CRC32_SHIFT)
 #define	 ID_AA64ISAR0_CRC32_BASE	(0x1 << ID_AA64ISAR0_CRC32_SHIFT)
 #define	ID_AA64ISAR0_ATOMIC_SHIFT	20
 #define	ID_AA64ISAR0_ATOMIC_MASK	(0xf << ID_AA64ISAR0_ATOMIC_SHIFT)
 #define	ID_AA64ISAR0_ATOMIC(x)		((x) & ID_AA64ISAR0_ATOMIC_MASK)
 #define	 ID_AA64ISAR0_ATOMIC_NONE	(0x0 << ID_AA64ISAR0_ATOMIC_SHIFT)
 #define	 ID_AA64ISAR0_ATOMIC_IMPL	(0x2 << ID_AA64ISAR0_ATOMIC_SHIFT)
 #define	ID_AA64ISAR0_RDM_SHIFT		28
 #define	ID_AA64ISAR0_RDM_MASK		(0xf << ID_AA64ISAR0_RDM_SHIFT)
 #define	ID_AA64ISAR0_RDM(x)		((x) & ID_AA64ISAR0_RDM_MASK)
 #define	 ID_AA64ISAR0_RDM_NONE		(0x0 << ID_AA64ISAR0_RDM_SHIFT)
 #define	 ID_AA64ISAR0_RDM_IMPL		(0x1 << ID_AA64ISAR0_RDM_SHIFT)
 
 /* ID_AA64MMFR0_EL1 */
 #define	ID_AA64MMFR0_MASK		0xffffffff
 #define	ID_AA64MMFR0_PA_RANGE_SHIFT	0
 #define	ID_AA64MMFR0_PA_RANGE_MASK	(0xf << ID_AA64MMFR0_PA_RANGE_SHIFT)
 #define	ID_AA64MMFR0_PA_RANGE(x)	((x) & ID_AA64MMFR0_PA_RANGE_MASK)
 #define	 ID_AA64MMFR0_PA_RANGE_4G	(0x0 << ID_AA64MMFR0_PA_RANGE_SHIFT)
 #define	 ID_AA64MMFR0_PA_RANGE_64G	(0x1 << ID_AA64MMFR0_PA_RANGE_SHIFT)
 #define	 ID_AA64MMFR0_PA_RANGE_1T	(0x2 << ID_AA64MMFR0_PA_RANGE_SHIFT)
 #define	 ID_AA64MMFR0_PA_RANGE_4T	(0x3 << ID_AA64MMFR0_PA_RANGE_SHIFT)
 #define	 ID_AA64MMFR0_PA_RANGE_16T	(0x4 << ID_AA64MMFR0_PA_RANGE_SHIFT)
 #define	 ID_AA64MMFR0_PA_RANGE_256T	(0x5 << ID_AA64MMFR0_PA_RANGE_SHIFT)
 #define	ID_AA64MMFR0_ASID_BITS_SHIFT	4
 #define	ID_AA64MMFR0_ASID_BITS_MASK	(0xf << ID_AA64MMFR0_ASID_BITS_SHIFT)
 #define	ID_AA64MMFR0_ASID_BITS(x)	((x) & ID_AA64MMFR0_ASID_BITS_MASK)
 #define	 ID_AA64MMFR0_ASID_BITS_8	(0x0 << ID_AA64MMFR0_ASID_BITS_SHIFT)
 #define	 ID_AA64MMFR0_ASID_BITS_16	(0x2 << ID_AA64MMFR0_ASID_BITS_SHIFT)
 #define	ID_AA64MMFR0_BIGEND_SHIFT	8
 #define	ID_AA64MMFR0_BIGEND_MASK	(0xf << ID_AA64MMFR0_BIGEND_SHIFT)
 #define	ID_AA64MMFR0_BIGEND(x)		((x) & ID_AA64MMFR0_BIGEND_MASK)
 #define	 ID_AA64MMFR0_BIGEND_FIXED	(0x0 << ID_AA64MMFR0_BIGEND_SHIFT)
 #define	 ID_AA64MMFR0_BIGEND_MIXED	(0x1 << ID_AA64MMFR0_BIGEND_SHIFT)
 #define	ID_AA64MMFR0_S_NS_MEM_SHIFT	12
 #define	ID_AA64MMFR0_S_NS_MEM_MASK	(0xf << ID_AA64MMFR0_S_NS_MEM_SHIFT)
 #define	ID_AA64MMFR0_S_NS_MEM(x)	((x) & ID_AA64MMFR0_S_NS_MEM_MASK)
 #define	 ID_AA64MMFR0_S_NS_MEM_NONE	(0x0 << ID_AA64MMFR0_S_NS_MEM_SHIFT)
 #define	 ID_AA64MMFR0_S_NS_MEM_DISTINCT	(0x1 << ID_AA64MMFR0_S_NS_MEM_SHIFT)
 #define	ID_AA64MMFR0_BIGEND_EL0_SHIFT	16
 #define	ID_AA64MMFR0_BIGEND_EL0_MASK	(0xf << ID_AA64MMFR0_BIGEND_EL0_SHIFT)
 #define	ID_AA64MMFR0_BIGEND_EL0(x)	((x) & ID_AA64MMFR0_BIGEND_EL0_MASK)
 #define	 ID_AA64MMFR0_BIGEND_EL0_FIXED	(0x0 << ID_AA64MMFR0_BIGEND_EL0_SHIFT)
 #define	 ID_AA64MMFR0_BIGEND_EL0_MIXED	(0x1 << ID_AA64MMFR0_BIGEND_EL0_SHIFT)
 #define	ID_AA64MMFR0_TGRAN16_SHIFT	20
 #define	ID_AA64MMFR0_TGRAN16_MASK	(0xf << ID_AA64MMFR0_TGRAN16_SHIFT)
 #define	ID_AA64MMFR0_TGRAN16(x)		((x) & ID_AA64MMFR0_TGRAN16_MASK)
 #define	 ID_AA64MMFR0_TGRAN16_NONE	(0x0 << ID_AA64MMFR0_TGRAN16_SHIFT)
 #define	 ID_AA64MMFR0_TGRAN16_IMPL	(0x1 << ID_AA64MMFR0_TGRAN16_SHIFT)
 #define	ID_AA64MMFR0_TGRAN64_SHIFT	24
 #define	ID_AA64MMFR0_TGRAN64_MASK	(0xf << ID_AA64MMFR0_TGRAN64_SHIFT)
 #define	ID_AA64MMFR0_TGRAN64(x)		((x) & ID_AA64MMFR0_TGRAN64_MASK)
 #define	 ID_AA64MMFR0_TGRAN64_IMPL	(0x0 << ID_AA64MMFR0_TGRAN64_SHIFT)
 #define	 ID_AA64MMFR0_TGRAN64_NONE	(0xf << ID_AA64MMFR0_TGRAN64_SHIFT)
 #define	ID_AA64MMFR0_TGRAN4_SHIFT	28
 #define	ID_AA64MMFR0_TGRAN4_MASK	(0xf << ID_AA64MMFR0_TGRAN4_SHIFT)
 #define	ID_AA64MMFR0_TGRAN4(x)		((x) & ID_AA64MMFR0_TGRAN4_MASK)
 #define	 ID_AA64MMFR0_TGRAN4_IMPL	(0x0 << ID_AA64MMFR0_TGRAN4_SHIFT)
 #define	 ID_AA64MMFR0_TGRAN4_NONE	(0xf << ID_AA64MMFR0_TGRAN4_SHIFT)
 
 /* ID_AA64MMFR1_EL1 */
 #define	ID_AA64MMFR1_MASK		0x00ffffff
 #define	ID_AA64MMFR1_HAFDBS_SHIFT	0
 #define	ID_AA64MMFR1_HAFDBS_MASK	(0xf << ID_AA64MMFR1_HAFDBS_SHIFT)
 #define	ID_AA64MMFR1_HAFDBS(x)		((x) & ID_AA64MMFR1_HAFDBS_MASK)
 #define	 ID_AA64MMFR1_HAFDBS_NONE	(0x0 << ID_AA64MMFR1_HAFDBS_SHIFT)
 #define	 ID_AA64MMFR1_HAFDBS_AF		(0x1 << ID_AA64MMFR1_HAFDBS_SHIFT)
 #define	 ID_AA64MMFR1_HAFDBS_AF_DBS	(0x2 << ID_AA64MMFR1_HAFDBS_SHIFT)
 #define	ID_AA64MMFR1_VMIDBITS_SHIFT	4
 #define	ID_AA64MMFR1_VMIDBITS_MASK	(0xf << ID_AA64MMFR1_VMIDBITS_SHIFT)
 #define	ID_AA64MMFR1_VMIDBITS(x)	((x) & ID_AA64MMFR1_VMIDBITS_MASK)
 #define	 ID_AA64MMFR1_VMIDBITS_8	(0x0 << ID_AA64MMFR1_VMIDBITS_SHIFT)
 #define	 ID_AA64MMFR1_VMIDBITS_16	(0x2 << ID_AA64MMFR1_VMIDBITS_SHIFT)
 #define	ID_AA64MMFR1_VH_SHIFT		8
 #define	ID_AA64MMFR1_VH_MASK		(0xf << ID_AA64MMFR1_VH_SHIFT)
 #define	ID_AA64MMFR1_VH(x)		((x) & ID_AA64MMFR1_VH_MASK)
 #define	 ID_AA64MMFR1_VH_NONE		(0x0 << ID_AA64MMFR1_VH_SHIFT)
 #define	 ID_AA64MMFR1_VH_IMPL		(0x1 << ID_AA64MMFR1_VH_SHIFT)
 #define	ID_AA64MMFR1_HPDS_SHIFT		12
 #define	ID_AA64MMFR1_HPDS_MASK		(0xf << ID_AA64MMFR1_HPDS_SHIFT)
 #define	ID_AA64MMFR1_HPDS(x)		((x) & ID_AA64MMFR1_HPDS_MASK)
 #define	 ID_AA64MMFR1_HPDS_NONE		(0x0 << ID_AA64MMFR1_HPDS_SHIFT)
 #define	 ID_AA64MMFR1_HPDS_IMPL		(0x1 << ID_AA64MMFR1_HPDS_SHIFT)
 #define	ID_AA64MMFR1_LO_SHIFT		16
 #define	ID_AA64MMFR1_LO_MASK		(0xf << ID_AA64MMFR1_LO_SHIFT)
 #define	ID_AA64MMFR1_LO(x)		((x) & ID_AA64MMFR1_LO_MASK)
 #define	 ID_AA64MMFR1_LO_NONE		(0x0 << ID_AA64MMFR1_LO_SHIFT)
 #define	 ID_AA64MMFR1_LO_IMPL		(0x1 << ID_AA64MMFR1_LO_SHIFT)
 #define	ID_AA64MMFR1_PAN_SHIFT		20
 #define	ID_AA64MMFR1_PAN_MASK		(0xf << ID_AA64MMFR1_PAN_SHIFT)
 #define	ID_AA64MMFR1_PAN(x)		((x) & ID_AA64MMFR1_PAN_MASK)
 #define	 ID_AA64MMFR1_PAN_NONE		(0x0 << ID_AA64MMFR1_PAN_SHIFT)
 #define	 ID_AA64MMFR1_PAN_IMPL		(0x1 << ID_AA64MMFR1_PAN_SHIFT)
 
 /* ID_AA64PFR0_EL1 */
 #define	ID_AA64PFR0_MASK		0x0fffffff
 #define	ID_AA64PFR0_EL0_SHIFT		0
 #define	ID_AA64PFR0_EL0_MASK		(0xf << ID_AA64PFR0_EL0_SHIFT)
 #define	ID_AA64PFR0_EL0(x)		((x) & ID_AA64PFR0_EL0_MASK)
 #define	 ID_AA64PFR0_EL0_64		(1 << ID_AA64PFR0_EL0_SHIFT)
 #define	 ID_AA64PFR0_EL0_64_32		(2 << ID_AA64PFR0_EL0_SHIFT)
 #define	ID_AA64PFR0_EL1_SHIFT		4
 #define	ID_AA64PFR0_EL1_MASK		(0xf << ID_AA64PFR0_EL1_SHIFT)
 #define	ID_AA64PFR0_EL1(x)		((x) & ID_AA64PFR0_EL1_MASK)
 #define	 ID_AA64PFR0_EL1_64		(1 << ID_AA64PFR0_EL1_SHIFT)
 #define	 ID_AA64PFR0_EL1_64_32		(2 << ID_AA64PFR0_EL1_SHIFT)
 #define	ID_AA64PFR0_EL2_SHIFT		8
 #define	ID_AA64PFR0_EL2_MASK		(0xf << ID_AA64PFR0_EL2_SHIFT)
 #define	ID_AA64PFR0_EL2(x)		((x) & ID_AA64PFR0_EL2_MASK)
 #define	 ID_AA64PFR0_EL2_NONE		(0 << ID_AA64PFR0_EL2_SHIFT)
 #define	 ID_AA64PFR0_EL2_64		(1 << ID_AA64PFR0_EL2_SHIFT)
 #define	 ID_AA64PFR0_EL2_64_32		(2 << ID_AA64PFR0_EL2_SHIFT)
 #define	ID_AA64PFR0_EL3_SHIFT		12
 #define	ID_AA64PFR0_EL3_MASK		(0xf << ID_AA64PFR0_EL3_SHIFT)
 #define	ID_AA64PFR0_EL3(x)		((x) & ID_AA64PFR0_EL3_MASK)
 #define	 ID_AA64PFR0_EL3_NONE		(0 << ID_AA64PFR0_EL3_SHIFT)
 #define	 ID_AA64PFR0_EL3_64		(1 << ID_AA64PFR0_EL3_SHIFT)
 #define	 ID_AA64PFR0_EL3_64_32		(2 << ID_AA64PFR0_EL3_SHIFT)
 #define	ID_AA64PFR0_FP_SHIFT		16
 #define	ID_AA64PFR0_FP_MASK		(0xf << ID_AA64PFR0_FP_SHIFT)
 #define	ID_AA64PFR0_FP(x)		((x) & ID_AA64PFR0_FP_MASK)
 #define	 ID_AA64PFR0_FP_IMPL		(0x0 << ID_AA64PFR0_FP_SHIFT)
 #define	 ID_AA64PFR0_FP_NONE		(0xf << ID_AA64PFR0_FP_SHIFT)
 #define	ID_AA64PFR0_ADV_SIMD_SHIFT	20
 #define	ID_AA64PFR0_ADV_SIMD_MASK	(0xf << ID_AA64PFR0_ADV_SIMD_SHIFT)
 #define	ID_AA64PFR0_ADV_SIMD(x)		((x) & ID_AA64PFR0_ADV_SIMD_MASK)
 #define	 ID_AA64PFR0_ADV_SIMD_IMPL	(0x0 << ID_AA64PFR0_ADV_SIMD_SHIFT)
 #define	 ID_AA64PFR0_ADV_SIMD_NONE	(0xf << ID_AA64PFR0_ADV_SIMD_SHIFT)
 #define	ID_AA64PFR0_GIC_BITS		0x4 /* Number of bits in GIC field */
 #define	ID_AA64PFR0_GIC_SHIFT		24
 #define	ID_AA64PFR0_GIC_MASK		(0xf << ID_AA64PFR0_GIC_SHIFT)
 #define	ID_AA64PFR0_GIC(x)		((x) & ID_AA64PFR0_GIC_MASK)
 #define	 ID_AA64PFR0_GIC_CPUIF_NONE	(0x0 << ID_AA64PFR0_GIC_SHIFT)
 #define	 ID_AA64PFR0_GIC_CPUIF_EN	(0x1 << ID_AA64PFR0_GIC_SHIFT)
 
 /* MAIR_EL1 - Memory Attribute Indirection Register */
 #define	MAIR_ATTR_MASK(idx)	(0xff << ((n)* 8))
 #define	MAIR_ATTR(attr, idx) ((attr) << ((idx) * 8))
 #define	 MAIR_DEVICE_nGnRnE	0x00
 #define	 MAIR_NORMAL_NC		0x44
 #define	 MAIR_NORMAL_WT		0x88
 #define	 MAIR_NORMAL_WB		0xff
 
 /* PAR_EL1 - Physical Address Register */
 #define	PAR_F_SHIFT		0
 #define	PAR_F			(0x1 << PAR_F_SHIFT)
 #define	PAR_SUCCESS(x)		(((x) & PAR_F) == 0)
 /* When PAR_F == 0 (success) */
 #define	PAR_SH_SHIFT		7
 #define	PAR_SH_MASK		(0x3 << PAR_SH_SHIFT)
 #define	PAR_NS_SHIFT		9
 #define	PAR_NS_MASK		(0x3 << PAR_NS_SHIFT)
 #define	PAR_PA_SHIFT		12
 #define	PAR_PA_MASK		0x0000fffffffff000
 #define	PAR_ATTR_SHIFT		56
 #define	PAR_ATTR_MASK		(0xff << PAR_ATTR_SHIFT)
 /* When PAR_F == 1 (aborted) */
 #define	PAR_FST_SHIFT		1
 #define	PAR_FST_MASK		(0x3f << PAR_FST_SHIFT)
 #define	PAR_PTW_SHIFT		8
 #define	PAR_PTW_MASK		(0x1 << PAR_PTW_SHIFT)
 #define	PAR_S_SHIFT		9
 #define	PAR_S_MASK		(0x1 << PAR_S_SHIFT)
 
 /* SCTLR_EL1 - System Control Register */
 #define	SCTLR_RES0	0xc8222400	/* Reserved, write 0 */
 #define	SCTLR_RES1	0x30d00800	/* Reserved, write 1 */
 
 #define	SCTLR_M		0x00000001
 #define	SCTLR_A		0x00000002
 #define	SCTLR_C		0x00000004
 #define	SCTLR_SA	0x00000008
 #define	SCTLR_SA0	0x00000010
 #define	SCTLR_CP15BEN	0x00000020
 #define	SCTLR_THEE	0x00000040
 #define	SCTLR_ITD	0x00000080
 #define	SCTLR_SED	0x00000100
 #define	SCTLR_UMA	0x00000200
 #define	SCTLR_I		0x00001000
 #define	SCTLR_DZE	0x00004000
 #define	SCTLR_UCT	0x00008000
 #define	SCTLR_nTWI	0x00010000
 #define	SCTLR_nTWE	0x00040000
 #define	SCTLR_WXN	0x00080000
 #define	SCTLR_EOE	0x01000000
 #define	SCTLR_EE	0x02000000
 #define	SCTLR_UCI	0x04000000
 
 /* SPSR_EL1 */
 /*
  * When the exception is taken in AArch64:
  * M[4]   is 0 for AArch64 mode
  * M[3:2] is the exception level
  * M[1]   is unused
  * M[0]   is the SP select:
  *         0: always SP0
  *         1: current ELs SP
  */
 #define	PSR_M_EL0t	0x00000000
 #define	PSR_M_EL1t	0x00000004
 #define	PSR_M_EL1h	0x00000005
 #define	PSR_M_EL2t	0x00000008
 #define	PSR_M_EL2h	0x00000009
 #define	PSR_M_MASK	0x0000001f
 
 #define	PSR_F		0x00000040
 #define	PSR_I		0x00000080
 #define	PSR_A		0x00000100
 #define	PSR_D		0x00000200
 #define	PSR_IL		0x00100000
 #define	PSR_SS		0x00200000
 #define	PSR_V		0x10000000
 #define	PSR_C		0x20000000
 #define	PSR_Z		0x40000000
 #define	PSR_N		0x80000000
 
 /* TCR_EL1 - Translation Control Register */
 #define	TCR_ASID_16	(1 << 36)
 
 #define	TCR_IPS_SHIFT	32
 #define	TCR_IPS_32BIT	(0 << TCR_IPS_SHIFT)
 #define	TCR_IPS_36BIT	(1 << TCR_IPS_SHIFT)
 #define	TCR_IPS_40BIT	(2 << TCR_IPS_SHIFT)
 #define	TCR_IPS_42BIT	(3 << TCR_IPS_SHIFT)
 #define	TCR_IPS_44BIT	(4 << TCR_IPS_SHIFT)
 #define	TCR_IPS_48BIT	(5 << TCR_IPS_SHIFT)
 
 #define	TCR_TG1_SHIFT	30
 #define	TCR_TG1_16K	(1 << TCR_TG1_SHIFT)
 #define	TCR_TG1_4K	(2 << TCR_TG1_SHIFT)
 #define	TCR_TG1_64K	(3 << TCR_TG1_SHIFT)
 
 #define	TCR_SH1_SHIFT	28
 #define	TCR_SH1_IS	(0x3UL << TCR_SH1_SHIFT)
 #define	TCR_ORGN1_SHIFT	26
 #define	TCR_ORGN1_WBWA	(0x1UL << TCR_ORGN1_SHIFT)
 #define	TCR_IRGN1_SHIFT	24
 #define	TCR_IRGN1_WBWA	(0x1UL << TCR_IRGN1_SHIFT)
 #define	TCR_SH0_SHIFT	12
 #define	TCR_SH0_IS	(0x3UL << TCR_SH0_SHIFT)
 #define	TCR_ORGN0_SHIFT	10
 #define	TCR_ORGN0_WBWA	(0x1UL << TCR_ORGN0_SHIFT)
 #define	TCR_IRGN0_SHIFT	8
 #define	TCR_IRGN0_WBWA	(0x1UL << TCR_IRGN0_SHIFT)
 
 #define	TCR_CACHE_ATTRS	((TCR_IRGN0_WBWA | TCR_IRGN1_WBWA) |\
 				(TCR_ORGN0_WBWA | TCR_ORGN1_WBWA))
 
 #ifdef SMP
 #define	TCR_SMP_ATTRS	(TCR_SH0_IS | TCR_SH1_IS)
 #else
 #define	TCR_SMP_ATTRS	0
 #endif
 
 #define	TCR_T1SZ_SHIFT	16
 #define	TCR_T0SZ_SHIFT	0
 #define	TCR_T1SZ(x)	((x) << TCR_T1SZ_SHIFT)
 #define	TCR_T0SZ(x)	((x) << TCR_T0SZ_SHIFT)
 #define	TCR_TxSZ(x)	(TCR_T1SZ(x) | TCR_T0SZ(x))
 
 /* Saved Program Status Register */
 #define	DBG_SPSR_SS	(0x1 << 21)
 
 /* Monitor Debug System Control Register */
 #define	DBG_MDSCR_SS	(0x1 << 0)
 #define	DBG_MDSCR_KDE	(0x1 << 13)
 #define	DBG_MDSCR_MDE	(0x1 << 15)
 
 /* Perfomance Monitoring Counters */
 #define	PMCR_E		(1 << 0) /* Enable all counters */
 #define	PMCR_P		(1 << 1) /* Reset all counters */
 #define	PMCR_C		(1 << 2) /* Clock counter reset */
 #define	PMCR_D		(1 << 3) /* CNTR counts every 64 clk cycles */
 #define	PMCR_X		(1 << 4) /* Export to ext. monitoring (ETM) */
 #define	PMCR_DP		(1 << 5) /* Disable CCNT if non-invasive debug*/
 #define	PMCR_LC		(1 << 6) /* Long cycle count enable */
 #define	PMCR_IMP_SHIFT	24 /* Implementer code */
 #define	PMCR_IMP_MASK	(0xff << PMCR_IMP_SHIFT)
 #define	PMCR_IDCODE_SHIFT	16 /* Identification code */
 #define	PMCR_IDCODE_MASK	(0xff << PMCR_IDCODE_SHIFT)
 #define	 PMCR_IDCODE_CORTEX_A57	0x01
 #define	 PMCR_IDCODE_CORTEX_A72	0x02
 #define	 PMCR_IDCODE_CORTEX_A53	0x03
 #define	PMCR_N_SHIFT	11       /* Number of counters implemented */
 #define	PMCR_N_MASK	(0x1f << PMCR_N_SHIFT)
 
 #endif /* !_MACHINE_ARMREG_H_ */
Index: projects/clang391-import/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c
===================================================================
--- projects/clang391-import/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c	(revision 309262)
+++ projects/clang391-import/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c	(revision 309263)
@@ -1,1164 +1,1171 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
  * Copyright (c) 2013 Steven Hartland. All rights reserved.
  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
  * Copyright (c) 2014 Integros [integros.com]
  */
 
 #include <sys/dsl_pool.h>
 #include <sys/dsl_dataset.h>
 #include <sys/dsl_prop.h>
 #include <sys/dsl_dir.h>
 #include <sys/dsl_synctask.h>
 #include <sys/dsl_scan.h>
 #include <sys/dnode.h>
 #include <sys/dmu_tx.h>
 #include <sys/dmu_objset.h>
 #include <sys/arc.h>
 #include <sys/zap.h>
 #include <sys/zio.h>
 #include <sys/zfs_context.h>
 #include <sys/fs/zfs.h>
 #include <sys/zfs_znode.h>
 #include <sys/spa_impl.h>
 #include <sys/dsl_deadlist.h>
 #include <sys/bptree.h>
 #include <sys/zfeature.h>
 #include <sys/zil_impl.h>
 #include <sys/dsl_userhold.h>
 
 #if defined(__FreeBSD__) && defined(_KERNEL)
 #include <sys/types.h>
 #include <sys/sysctl.h>
 #endif
 
 /*
  * ZFS Write Throttle
  * ------------------
  *
  * ZFS must limit the rate of incoming writes to the rate at which it is able
  * to sync data modifications to the backend storage. Throttling by too much
  * creates an artificial limit; throttling by too little can only be sustained
  * for short periods and would lead to highly lumpy performance. On a per-pool
  * basis, ZFS tracks the amount of modified (dirty) data. As operations change
  * data, the amount of dirty data increases; as ZFS syncs out data, the amount
  * of dirty data decreases. When the amount of dirty data exceeds a
  * predetermined threshold further modifications are blocked until the amount
  * of dirty data decreases (as data is synced out).
  *
  * The limit on dirty data is tunable, and should be adjusted according to
  * both the IO capacity and available memory of the system. The larger the
  * window, the more ZFS is able to aggregate and amortize metadata (and data)
  * changes. However, memory is a limited resource, and allowing for more dirty
  * data comes at the cost of keeping other useful data in memory (for example
  * ZFS data cached by the ARC).
  *
  * Implementation
  *
  * As buffers are modified dsl_pool_willuse_space() increments both the per-
  * txg (dp_dirty_pertxg[]) and poolwide (dp_dirty_total) accounting of
  * dirty space used; dsl_pool_dirty_space() decrements those values as data
  * is synced out from dsl_pool_sync(). While only the poolwide value is
  * relevant, the per-txg value is useful for debugging. The tunable
  * zfs_dirty_data_max determines the dirty space limit. Once that value is
  * exceeded, new writes are halted until space frees up.
  *
  * The zfs_dirty_data_sync tunable dictates the threshold at which we
  * ensure that there is a txg syncing (see the comment in txg.c for a full
  * description of transaction group stages).
  *
  * The IO scheduler uses both the dirty space limit and current amount of
  * dirty data as inputs. Those values affect the number of concurrent IOs ZFS
  * issues. See the comment in vdev_queue.c for details of the IO scheduler.
  *
  * The delay is also calculated based on the amount of dirty data.  See the
  * comment above dmu_tx_delay() for details.
  */
 
 /*
  * zfs_dirty_data_max will be set to zfs_dirty_data_max_percent% of all memory,
  * capped at zfs_dirty_data_max_max.  It can also be overridden in /etc/system.
  */
 uint64_t zfs_dirty_data_max;
 uint64_t zfs_dirty_data_max_max = 4ULL * 1024 * 1024 * 1024;
 int zfs_dirty_data_max_percent = 10;
 
 /*
  * If there is at least this much dirty data, push out a txg.
  */
 uint64_t zfs_dirty_data_sync = 64 * 1024 * 1024;
 
 /*
  * Once there is this amount of dirty data, the dmu_tx_delay() will kick in
  * and delay each transaction.
  * This value should be >= zfs_vdev_async_write_active_max_dirty_percent.
  */
 int zfs_delay_min_dirty_percent = 60;
 
 /*
  * This controls how quickly the delay approaches infinity.
  * Larger values cause it to delay more for a given amount of dirty data.
  * Therefore larger values will cause there to be less dirty data for a
  * given throughput.
  *
  * For the smoothest delay, this value should be about 1 billion divided
  * by the maximum number of operations per second.  This will smoothly
  * handle between 10x and 1/10th this number.
  *
  * Note: zfs_delay_scale * zfs_dirty_data_max must be < 2^64, due to the
  * multiply in dmu_tx_delay().
  */
 uint64_t zfs_delay_scale = 1000 * 1000 * 1000 / 2000;
 
 
 #if defined(__FreeBSD__) && defined(_KERNEL)
 
 extern int zfs_vdev_async_write_active_max_dirty_percent;
 
 SYSCTL_DECL(_vfs_zfs);
 
 SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, dirty_data_max, CTLFLAG_RWTUN,
     &zfs_dirty_data_max, 0,
     "The maximum amount of dirty data in bytes after which new writes are "
     "halted until space becomes available");
 
 SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, dirty_data_max_max, CTLFLAG_RDTUN,
     &zfs_dirty_data_max_max, 0,
     "The absolute cap on dirty_data_max when auto calculating");
 
 static int sysctl_zfs_dirty_data_max_percent(SYSCTL_HANDLER_ARGS);
 SYSCTL_PROC(_vfs_zfs, OID_AUTO, dirty_data_max_percent,
     CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RWTUN, 0, sizeof(int),
     sysctl_zfs_dirty_data_max_percent, "I",
     "The percent of physical memory used to auto calculate dirty_data_max");
 
 SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, dirty_data_sync, CTLFLAG_RWTUN,
     &zfs_dirty_data_sync, 0,
     "Force a txg if the number of dirty buffer bytes exceed this value");
 
 static int sysctl_zfs_delay_min_dirty_percent(SYSCTL_HANDLER_ARGS);
 /* No zfs_delay_min_dirty_percent tunable due to limit requirements */
 SYSCTL_PROC(_vfs_zfs, OID_AUTO, delay_min_dirty_percent,
     CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RW, 0, sizeof(int),
     sysctl_zfs_delay_min_dirty_percent, "I",
     "The limit of outstanding dirty data before transations are delayed");
 
 static int sysctl_zfs_delay_scale(SYSCTL_HANDLER_ARGS);
 /* No zfs_delay_scale tunable due to limit requirements */
 SYSCTL_PROC(_vfs_zfs, OID_AUTO, delay_scale,
     CTLTYPE_U64 | CTLFLAG_MPSAFE | CTLFLAG_RW, 0, sizeof(uint64_t),
     sysctl_zfs_delay_scale, "QU",
     "Controls how quickly the delay approaches infinity");
 
 static int
 sysctl_zfs_dirty_data_max_percent(SYSCTL_HANDLER_ARGS)
 {
 	int val, err;
 
 	val = zfs_dirty_data_max_percent;
 	err = sysctl_handle_int(oidp, &val, 0, req);
 	if (err != 0 || req->newptr == NULL)
 		return (err);
 
 	if (val < 0 || val > 100)
 		return (EINVAL);
 
 	zfs_dirty_data_max_percent = val;
 
 	return (0);
 }
 
 static int
 sysctl_zfs_delay_min_dirty_percent(SYSCTL_HANDLER_ARGS)
 {
 	int val, err;
 
 	val = zfs_delay_min_dirty_percent;
 	err = sysctl_handle_int(oidp, &val, 0, req);
 	if (err != 0 || req->newptr == NULL)
 		return (err);
 
 	if (val < zfs_vdev_async_write_active_max_dirty_percent)
 		return (EINVAL);
 
 	zfs_delay_min_dirty_percent = val;
 
 	return (0);
 }
 
 static int
 sysctl_zfs_delay_scale(SYSCTL_HANDLER_ARGS)
 {
 	uint64_t val;
 	int err;
 
 	val = zfs_delay_scale;
 	err = sysctl_handle_64(oidp, &val, 0, req);
 	if (err != 0 || req->newptr == NULL)
 		return (err);
 
 	if (val > UINT64_MAX / zfs_dirty_data_max)
 		return (EINVAL);
 
 	zfs_delay_scale = val;
 
 	return (0);
 }
 #endif
 
 hrtime_t zfs_throttle_delay = MSEC2NSEC(10);
 hrtime_t zfs_throttle_resolution = MSEC2NSEC(10);
 
 int
 dsl_pool_open_special_dir(dsl_pool_t *dp, const char *name, dsl_dir_t **ddp)
 {
 	uint64_t obj;
 	int err;
 
 	err = zap_lookup(dp->dp_meta_objset,
 	    dsl_dir_phys(dp->dp_root_dir)->dd_child_dir_zapobj,
 	    name, sizeof (obj), 1, &obj);
 	if (err)
 		return (err);
 
 	return (dsl_dir_hold_obj(dp, obj, name, dp, ddp));
 }
 
 static dsl_pool_t *
 dsl_pool_open_impl(spa_t *spa, uint64_t txg)
 {
 	dsl_pool_t *dp;
 	blkptr_t *bp = spa_get_rootblkptr(spa);
 
 	dp = kmem_zalloc(sizeof (dsl_pool_t), KM_SLEEP);
 	dp->dp_spa = spa;
 	dp->dp_meta_rootbp = *bp;
 	rrw_init(&dp->dp_config_rwlock, B_TRUE);
 	txg_init(dp, txg);
 
 	txg_list_create(&dp->dp_dirty_datasets,
 	    offsetof(dsl_dataset_t, ds_dirty_link));
 	txg_list_create(&dp->dp_dirty_zilogs,
 	    offsetof(zilog_t, zl_dirty_link));
 	txg_list_create(&dp->dp_dirty_dirs,
 	    offsetof(dsl_dir_t, dd_dirty_link));
 	txg_list_create(&dp->dp_sync_tasks,
 	    offsetof(dsl_sync_task_t, dst_node));
 
 	mutex_init(&dp->dp_lock, NULL, MUTEX_DEFAULT, NULL);
 	cv_init(&dp->dp_spaceavail_cv, NULL, CV_DEFAULT, NULL);
 
 	dp->dp_vnrele_taskq = taskq_create("zfs_vn_rele_taskq", 1, minclsyspri,
 	    1, 4, 0);
 
 	return (dp);
 }
 
 int
 dsl_pool_init(spa_t *spa, uint64_t txg, dsl_pool_t **dpp)
 {
 	int err;
 	dsl_pool_t *dp = dsl_pool_open_impl(spa, txg);
 
 	err = dmu_objset_open_impl(spa, NULL, &dp->dp_meta_rootbp,
 	    &dp->dp_meta_objset);
 	if (err != 0)
 		dsl_pool_close(dp);
 	else
 		*dpp = dp;
 
 	return (err);
 }
 
 int
 dsl_pool_open(dsl_pool_t *dp)
 {
 	int err;
 	dsl_dir_t *dd;
 	dsl_dataset_t *ds;
 	uint64_t obj;
 
 	rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG);
 	err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
 	    DMU_POOL_ROOT_DATASET, sizeof (uint64_t), 1,
 	    &dp->dp_root_dir_obj);
 	if (err)
 		goto out;
 
 	err = dsl_dir_hold_obj(dp, dp->dp_root_dir_obj,
 	    NULL, dp, &dp->dp_root_dir);
 	if (err)
 		goto out;
 
 	err = dsl_pool_open_special_dir(dp, MOS_DIR_NAME, &dp->dp_mos_dir);
 	if (err)
 		goto out;
 
 	if (spa_version(dp->dp_spa) >= SPA_VERSION_ORIGIN) {
 		err = dsl_pool_open_special_dir(dp, ORIGIN_DIR_NAME, &dd);
 		if (err)
 			goto out;
 		err = dsl_dataset_hold_obj(dp,
 		    dsl_dir_phys(dd)->dd_head_dataset_obj, FTAG, &ds);
 		if (err == 0) {
 			err = dsl_dataset_hold_obj(dp,
 			    dsl_dataset_phys(ds)->ds_prev_snap_obj, dp,
 			    &dp->dp_origin_snap);
 			dsl_dataset_rele(ds, FTAG);
 		}
 		dsl_dir_rele(dd, dp);
 		if (err)
 			goto out;
 	}
 
 	if (spa_version(dp->dp_spa) >= SPA_VERSION_DEADLISTS) {
 		err = dsl_pool_open_special_dir(dp, FREE_DIR_NAME,
 		    &dp->dp_free_dir);
 		if (err)
 			goto out;
 
 		err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
 		    DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj);
 		if (err)
 			goto out;
 		VERIFY0(bpobj_open(&dp->dp_free_bpobj,
 		    dp->dp_meta_objset, obj));
 	}
 
 	/*
 	 * Note: errors ignored, because the leak dir will not exist if we
 	 * have not encountered a leak yet.
 	 */
 	(void) dsl_pool_open_special_dir(dp, LEAK_DIR_NAME,
 	    &dp->dp_leak_dir);
 
 	if (spa_feature_is_active(dp->dp_spa, SPA_FEATURE_ASYNC_DESTROY)) {
 		err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
 		    DMU_POOL_BPTREE_OBJ, sizeof (uint64_t), 1,
 		    &dp->dp_bptree_obj);
 		if (err != 0)
 			goto out;
 	}
 
 	if (spa_feature_is_active(dp->dp_spa, SPA_FEATURE_EMPTY_BPOBJ)) {
 		err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
 		    DMU_POOL_EMPTY_BPOBJ, sizeof (uint64_t), 1,
 		    &dp->dp_empty_bpobj);
 		if (err != 0)
 			goto out;
 	}
 
 	err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
 	    DMU_POOL_TMP_USERREFS, sizeof (uint64_t), 1,
 	    &dp->dp_tmp_userrefs_obj);
 	if (err == ENOENT)
 		err = 0;
 	if (err)
 		goto out;
 
 	err = dsl_scan_init(dp, dp->dp_tx.tx_open_txg);
 
 out:
 	rrw_exit(&dp->dp_config_rwlock, FTAG);
 	return (err);
 }
 
 void
 dsl_pool_close(dsl_pool_t *dp)
 {
 	/*
 	 * Drop our references from dsl_pool_open().
 	 *
 	 * Since we held the origin_snap from "syncing" context (which
 	 * includes pool-opening context), it actually only got a "ref"
 	 * and not a hold, so just drop that here.
 	 */
 	if (dp->dp_origin_snap)
 		dsl_dataset_rele(dp->dp_origin_snap, dp);
 	if (dp->dp_mos_dir)
 		dsl_dir_rele(dp->dp_mos_dir, dp);
 	if (dp->dp_free_dir)
 		dsl_dir_rele(dp->dp_free_dir, dp);
 	if (dp->dp_leak_dir)
 		dsl_dir_rele(dp->dp_leak_dir, dp);
 	if (dp->dp_root_dir)
 		dsl_dir_rele(dp->dp_root_dir, dp);
 
 	bpobj_close(&dp->dp_free_bpobj);
 
 	/* undo the dmu_objset_open_impl(mos) from dsl_pool_open() */
 	if (dp->dp_meta_objset)
 		dmu_objset_evict(dp->dp_meta_objset);
 
 	txg_list_destroy(&dp->dp_dirty_datasets);
 	txg_list_destroy(&dp->dp_dirty_zilogs);
 	txg_list_destroy(&dp->dp_sync_tasks);
 	txg_list_destroy(&dp->dp_dirty_dirs);
 
 	/*
 	 * We can't set retry to TRUE since we're explicitly specifying
 	 * a spa to flush. This is good enough; any missed buffers for
 	 * this spa won't cause trouble, and they'll eventually fall
 	 * out of the ARC just like any other unused buffer.
 	 */
 	arc_flush(dp->dp_spa, FALSE);
 
 	txg_fini(dp);
 	dsl_scan_fini(dp);
 	dmu_buf_user_evict_wait();
 
 	rrw_destroy(&dp->dp_config_rwlock);
 	mutex_destroy(&dp->dp_lock);
 	taskq_destroy(dp->dp_vnrele_taskq);
 	if (dp->dp_blkstats)
 		kmem_free(dp->dp_blkstats, sizeof (zfs_all_blkstats_t));
 	kmem_free(dp, sizeof (dsl_pool_t));
 }
 
 dsl_pool_t *
 dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg)
 {
 	int err;
 	dsl_pool_t *dp = dsl_pool_open_impl(spa, txg);
 	dmu_tx_t *tx = dmu_tx_create_assigned(dp, txg);
 	objset_t *os;
 	dsl_dataset_t *ds;
 	uint64_t obj;
 
 	rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG);
 
 	/* create and open the MOS (meta-objset) */
 	dp->dp_meta_objset = dmu_objset_create_impl(spa,
 	    NULL, &dp->dp_meta_rootbp, DMU_OST_META, tx);
 
 	/* create the pool directory */
 	err = zap_create_claim(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
 	    DMU_OT_OBJECT_DIRECTORY, DMU_OT_NONE, 0, tx);
 	ASSERT0(err);
 
 	/* Initialize scan structures */
 	VERIFY0(dsl_scan_init(dp, txg));
 
 	/* create and open the root dir */
 	dp->dp_root_dir_obj = dsl_dir_create_sync(dp, NULL, NULL, tx);
 	VERIFY0(dsl_dir_hold_obj(dp, dp->dp_root_dir_obj,
 	    NULL, dp, &dp->dp_root_dir));
 
 	/* create and open the meta-objset dir */
 	(void) dsl_dir_create_sync(dp, dp->dp_root_dir, MOS_DIR_NAME, tx);
 	VERIFY0(dsl_pool_open_special_dir(dp,
 	    MOS_DIR_NAME, &dp->dp_mos_dir));
 
 	if (spa_version(spa) >= SPA_VERSION_DEADLISTS) {
 		/* create and open the free dir */
 		(void) dsl_dir_create_sync(dp, dp->dp_root_dir,
 		    FREE_DIR_NAME, tx);
 		VERIFY0(dsl_pool_open_special_dir(dp,
 		    FREE_DIR_NAME, &dp->dp_free_dir));
 
 		/* create and open the free_bplist */
 		obj = bpobj_alloc(dp->dp_meta_objset, SPA_OLD_MAXBLOCKSIZE, tx);
 		VERIFY(zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
 		    DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj, tx) == 0);
 		VERIFY0(bpobj_open(&dp->dp_free_bpobj,
 		    dp->dp_meta_objset, obj));
 	}
 
 	if (spa_version(spa) >= SPA_VERSION_DSL_SCRUB)
 		dsl_pool_create_origin(dp, tx);
 
 	/* create the root dataset */
 	obj = dsl_dataset_create_sync_dd(dp->dp_root_dir, NULL, 0, tx);
 
 	/* create the root objset */
 	VERIFY0(dsl_dataset_hold_obj(dp, obj, FTAG, &ds));
 	rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
 	os = dmu_objset_create_impl(dp->dp_spa, ds,
 	    dsl_dataset_get_blkptr(ds), DMU_OST_ZFS, tx);
 	rrw_exit(&ds->ds_bp_rwlock, FTAG);
 #ifdef _KERNEL
 	zfs_create_fs(os, kcred, zplprops, tx);
 #endif
 	dsl_dataset_rele(ds, FTAG);
 
 	dmu_tx_commit(tx);
 
 	rrw_exit(&dp->dp_config_rwlock, FTAG);
 
 	return (dp);
 }
 
 /*
  * Account for the meta-objset space in its placeholder dsl_dir.
  */
 void
 dsl_pool_mos_diduse_space(dsl_pool_t *dp,
     int64_t used, int64_t comp, int64_t uncomp)
 {
 	ASSERT3U(comp, ==, uncomp); /* it's all metadata */
 	mutex_enter(&dp->dp_lock);
 	dp->dp_mos_used_delta += used;
 	dp->dp_mos_compressed_delta += comp;
 	dp->dp_mos_uncompressed_delta += uncomp;
 	mutex_exit(&dp->dp_lock);
 }
 
 static void
 dsl_pool_sync_mos(dsl_pool_t *dp, dmu_tx_t *tx)
 {
 	zio_t *zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
 	dmu_objset_sync(dp->dp_meta_objset, zio, tx);
 	VERIFY0(zio_wait(zio));
 	dprintf_bp(&dp->dp_meta_rootbp, "meta objset rootbp is %s", "");
 	spa_set_rootblkptr(dp->dp_spa, &dp->dp_meta_rootbp);
 }
 
 static void
 dsl_pool_dirty_delta(dsl_pool_t *dp, int64_t delta)
 {
 	ASSERT(MUTEX_HELD(&dp->dp_lock));
 
 	if (delta < 0)
 		ASSERT3U(-delta, <=, dp->dp_dirty_total);
 
 	dp->dp_dirty_total += delta;
 
 	/*
 	 * Note: we signal even when increasing dp_dirty_total.
 	 * This ensures forward progress -- each thread wakes the next waiter.
 	 */
 	if (dp->dp_dirty_total <= zfs_dirty_data_max)
 		cv_signal(&dp->dp_spaceavail_cv);
 }
 
 void
 dsl_pool_sync(dsl_pool_t *dp, uint64_t txg)
 {
 	zio_t *zio;
 	dmu_tx_t *tx;
 	dsl_dir_t *dd;
 	dsl_dataset_t *ds;
 	objset_t *mos = dp->dp_meta_objset;
 	list_t synced_datasets;
 
 	list_create(&synced_datasets, sizeof (dsl_dataset_t),
 	    offsetof(dsl_dataset_t, ds_synced_link));
 
 	tx = dmu_tx_create_assigned(dp, txg);
 
 	/*
 	 * Write out all dirty blocks of dirty datasets.
 	 */
 	zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
 	while ((ds = txg_list_remove(&dp->dp_dirty_datasets, txg)) != NULL) {
 		/*
 		 * We must not sync any non-MOS datasets twice, because
 		 * we may have taken a snapshot of them.  However, we
 		 * may sync newly-created datasets on pass 2.
 		 */
 		ASSERT(!list_link_active(&ds->ds_synced_link));
 		list_insert_tail(&synced_datasets, ds);
 		dsl_dataset_sync(ds, zio, tx);
 	}
 	VERIFY0(zio_wait(zio));
 
 	/*
 	 * We have written all of the accounted dirty data, so our
 	 * dp_space_towrite should now be zero.  However, some seldom-used
 	 * code paths do not adhere to this (e.g. dbuf_undirty(), also
 	 * rounding error in dbuf_write_physdone).
 	 * Shore up the accounting of any dirtied space now.
 	 */
 	dsl_pool_undirty_space(dp, dp->dp_dirty_pertxg[txg & TXG_MASK], txg);
 
 	/*
 	 * After the data blocks have been written (ensured by the zio_wait()
 	 * above), update the user/group space accounting.
 	 */
 	for (ds = list_head(&synced_datasets); ds != NULL;
 	    ds = list_next(&synced_datasets, ds)) {
 		dmu_objset_do_userquota_updates(ds->ds_objset, tx);
 	}
 
 	/*
 	 * Sync the datasets again to push out the changes due to
 	 * userspace updates.  This must be done before we process the
 	 * sync tasks, so that any snapshots will have the correct
 	 * user accounting information (and we won't get confused
 	 * about which blocks are part of the snapshot).
 	 */
 	zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
 	while ((ds = txg_list_remove(&dp->dp_dirty_datasets, txg)) != NULL) {
 		ASSERT(list_link_active(&ds->ds_synced_link));
 		dmu_buf_rele(ds->ds_dbuf, ds);
 		dsl_dataset_sync(ds, zio, tx);
 	}
 	VERIFY0(zio_wait(zio));
 
 	/*
 	 * Now that the datasets have been completely synced, we can
 	 * clean up our in-memory structures accumulated while syncing:
 	 *
 	 *  - move dead blocks from the pending deadlist to the on-disk deadlist
 	 *  - release hold from dsl_dataset_dirty()
 	 */
 	while ((ds = list_remove_head(&synced_datasets)) != NULL) {
 		dsl_dataset_sync_done(ds, tx);
 	}
 	while ((dd = txg_list_remove(&dp->dp_dirty_dirs, txg)) != NULL) {
 		dsl_dir_sync(dd, tx);
 	}
 
 	/*
 	 * The MOS's space is accounted for in the pool/$MOS
 	 * (dp_mos_dir).  We can't modify the mos while we're syncing
 	 * it, so we remember the deltas and apply them here.
 	 */
 	if (dp->dp_mos_used_delta != 0 || dp->dp_mos_compressed_delta != 0 ||
 	    dp->dp_mos_uncompressed_delta != 0) {
 		dsl_dir_diduse_space(dp->dp_mos_dir, DD_USED_HEAD,
 		    dp->dp_mos_used_delta,
 		    dp->dp_mos_compressed_delta,
 		    dp->dp_mos_uncompressed_delta, tx);
 		dp->dp_mos_used_delta = 0;
 		dp->dp_mos_compressed_delta = 0;
 		dp->dp_mos_uncompressed_delta = 0;
 	}
 
 	if (list_head(&mos->os_dirty_dnodes[txg & TXG_MASK]) != NULL ||
 	    list_head(&mos->os_free_dnodes[txg & TXG_MASK]) != NULL) {
 		dsl_pool_sync_mos(dp, tx);
 	}
 
 	/*
 	 * If we modify a dataset in the same txg that we want to destroy it,
 	 * its dsl_dir's dd_dbuf will be dirty, and thus have a hold on it.
 	 * dsl_dir_destroy_check() will fail if there are unexpected holds.
 	 * Therefore, we want to sync the MOS (thus syncing the dd_dbuf
 	 * and clearing the hold on it) before we process the sync_tasks.
 	 * The MOS data dirtied by the sync_tasks will be synced on the next
 	 * pass.
 	 */
 	if (!txg_list_empty(&dp->dp_sync_tasks, txg)) {
 		dsl_sync_task_t *dst;
 		/*
 		 * No more sync tasks should have been added while we
 		 * were syncing.
 		 */
 		ASSERT3U(spa_sync_pass(dp->dp_spa), ==, 1);
 		while ((dst = txg_list_remove(&dp->dp_sync_tasks, txg)) != NULL)
 			dsl_sync_task_sync(dst, tx);
 	}
 
 	dmu_tx_commit(tx);
 
 	DTRACE_PROBE2(dsl_pool_sync__done, dsl_pool_t *dp, dp, uint64_t, txg);
 }
 
 void
 dsl_pool_sync_done(dsl_pool_t *dp, uint64_t txg)
 {
 	zilog_t *zilog;
 
-	while (zilog = txg_list_remove(&dp->dp_dirty_zilogs, txg)) {
+	while (zilog = txg_list_head(&dp->dp_dirty_zilogs, txg)) {
 		dsl_dataset_t *ds = dmu_objset_ds(zilog->zl_os);
+		/*
+		 * We don't remove the zilog from the dp_dirty_zilogs
+		 * list until after we've cleaned it. This ensures that
+		 * callers of zilog_is_dirty() receive an accurate
+		 * answer when they are racing with the spa sync thread.
+		 */
 		zil_clean(zilog, txg);
+		(void) txg_list_remove_this(&dp->dp_dirty_zilogs, zilog, txg);
 		ASSERT(!dmu_objset_is_dirty(zilog->zl_os, txg));
 		dmu_buf_rele(ds->ds_dbuf, zilog);
 	}
 	ASSERT(!dmu_objset_is_dirty(dp->dp_meta_objset, txg));
 }
 
 /*
  * TRUE if the current thread is the tx_sync_thread or if we
  * are being called from SPA context during pool initialization.
  */
 int
 dsl_pool_sync_context(dsl_pool_t *dp)
 {
 	return (curthread == dp->dp_tx.tx_sync_thread ||
 	    spa_is_initializing(dp->dp_spa));
 }
 
 uint64_t
 dsl_pool_adjustedsize(dsl_pool_t *dp, boolean_t netfree)
 {
 	uint64_t space, resv;
 
 	/*
 	 * If we're trying to assess whether it's OK to do a free,
 	 * cut the reservation in half to allow forward progress
 	 * (e.g. make it possible to rm(1) files from a full pool).
 	 */
 	space = spa_get_dspace(dp->dp_spa);
 	resv = spa_get_slop_space(dp->dp_spa);
 	if (netfree)
 		resv >>= 1;
 
 	return (space - resv);
 }
 
 boolean_t
 dsl_pool_need_dirty_delay(dsl_pool_t *dp)
 {
 	uint64_t delay_min_bytes =
 	    zfs_dirty_data_max * zfs_delay_min_dirty_percent / 100;
 	boolean_t rv;
 
 	mutex_enter(&dp->dp_lock);
 	if (dp->dp_dirty_total > zfs_dirty_data_sync)
 		txg_kick(dp);
 	rv = (dp->dp_dirty_total > delay_min_bytes);
 	mutex_exit(&dp->dp_lock);
 	return (rv);
 }
 
 void
 dsl_pool_dirty_space(dsl_pool_t *dp, int64_t space, dmu_tx_t *tx)
 {
 	if (space > 0) {
 		mutex_enter(&dp->dp_lock);
 		dp->dp_dirty_pertxg[tx->tx_txg & TXG_MASK] += space;
 		dsl_pool_dirty_delta(dp, space);
 		mutex_exit(&dp->dp_lock);
 	}
 }
 
 void
 dsl_pool_undirty_space(dsl_pool_t *dp, int64_t space, uint64_t txg)
 {
 	ASSERT3S(space, >=, 0);
 	if (space == 0)
 		return;
 	mutex_enter(&dp->dp_lock);
 	if (dp->dp_dirty_pertxg[txg & TXG_MASK] < space) {
 		/* XXX writing something we didn't dirty? */
 		space = dp->dp_dirty_pertxg[txg & TXG_MASK];
 	}
 	ASSERT3U(dp->dp_dirty_pertxg[txg & TXG_MASK], >=, space);
 	dp->dp_dirty_pertxg[txg & TXG_MASK] -= space;
 	ASSERT3U(dp->dp_dirty_total, >=, space);
 	dsl_pool_dirty_delta(dp, -space);
 	mutex_exit(&dp->dp_lock);
 }
 
 /* ARGSUSED */
 static int
 upgrade_clones_cb(dsl_pool_t *dp, dsl_dataset_t *hds, void *arg)
 {
 	dmu_tx_t *tx = arg;
 	dsl_dataset_t *ds, *prev = NULL;
 	int err;
 
 	err = dsl_dataset_hold_obj(dp, hds->ds_object, FTAG, &ds);
 	if (err)
 		return (err);
 
 	while (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) {
 		err = dsl_dataset_hold_obj(dp,
 		    dsl_dataset_phys(ds)->ds_prev_snap_obj, FTAG, &prev);
 		if (err) {
 			dsl_dataset_rele(ds, FTAG);
 			return (err);
 		}
 
 		if (dsl_dataset_phys(prev)->ds_next_snap_obj != ds->ds_object)
 			break;
 		dsl_dataset_rele(ds, FTAG);
 		ds = prev;
 		prev = NULL;
 	}
 
 	if (prev == NULL) {
 		prev = dp->dp_origin_snap;
 
 		/*
 		 * The $ORIGIN can't have any data, or the accounting
 		 * will be wrong.
 		 */
 		rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
 		ASSERT0(dsl_dataset_phys(prev)->ds_bp.blk_birth);
 		rrw_exit(&ds->ds_bp_rwlock, FTAG);
 
 		/* The origin doesn't get attached to itself */
 		if (ds->ds_object == prev->ds_object) {
 			dsl_dataset_rele(ds, FTAG);
 			return (0);
 		}
 
 		dmu_buf_will_dirty(ds->ds_dbuf, tx);
 		dsl_dataset_phys(ds)->ds_prev_snap_obj = prev->ds_object;
 		dsl_dataset_phys(ds)->ds_prev_snap_txg =
 		    dsl_dataset_phys(prev)->ds_creation_txg;
 
 		dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx);
 		dsl_dir_phys(ds->ds_dir)->dd_origin_obj = prev->ds_object;
 
 		dmu_buf_will_dirty(prev->ds_dbuf, tx);
 		dsl_dataset_phys(prev)->ds_num_children++;
 
 		if (dsl_dataset_phys(ds)->ds_next_snap_obj == 0) {
 			ASSERT(ds->ds_prev == NULL);
 			VERIFY0(dsl_dataset_hold_obj(dp,
 			    dsl_dataset_phys(ds)->ds_prev_snap_obj,
 			    ds, &ds->ds_prev));
 		}
 	}
 
 	ASSERT3U(dsl_dir_phys(ds->ds_dir)->dd_origin_obj, ==, prev->ds_object);
 	ASSERT3U(dsl_dataset_phys(ds)->ds_prev_snap_obj, ==, prev->ds_object);
 
 	if (dsl_dataset_phys(prev)->ds_next_clones_obj == 0) {
 		dmu_buf_will_dirty(prev->ds_dbuf, tx);
 		dsl_dataset_phys(prev)->ds_next_clones_obj =
 		    zap_create(dp->dp_meta_objset,
 		    DMU_OT_NEXT_CLONES, DMU_OT_NONE, 0, tx);
 	}
 	VERIFY0(zap_add_int(dp->dp_meta_objset,
 	    dsl_dataset_phys(prev)->ds_next_clones_obj, ds->ds_object, tx));
 
 	dsl_dataset_rele(ds, FTAG);
 	if (prev != dp->dp_origin_snap)
 		dsl_dataset_rele(prev, FTAG);
 	return (0);
 }
 
 void
 dsl_pool_upgrade_clones(dsl_pool_t *dp, dmu_tx_t *tx)
 {
 	ASSERT(dmu_tx_is_syncing(tx));
 	ASSERT(dp->dp_origin_snap != NULL);
 
 	VERIFY0(dmu_objset_find_dp(dp, dp->dp_root_dir_obj, upgrade_clones_cb,
 	    tx, DS_FIND_CHILDREN | DS_FIND_SERIALIZE));
 }
 
 /* ARGSUSED */
 static int
 upgrade_dir_clones_cb(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg)
 {
 	dmu_tx_t *tx = arg;
 	objset_t *mos = dp->dp_meta_objset;
 
 	if (dsl_dir_phys(ds->ds_dir)->dd_origin_obj != 0) {
 		dsl_dataset_t *origin;
 
 		VERIFY0(dsl_dataset_hold_obj(dp,
 		    dsl_dir_phys(ds->ds_dir)->dd_origin_obj, FTAG, &origin));
 
 		if (dsl_dir_phys(origin->ds_dir)->dd_clones == 0) {
 			dmu_buf_will_dirty(origin->ds_dir->dd_dbuf, tx);
 			dsl_dir_phys(origin->ds_dir)->dd_clones =
 			    zap_create(mos, DMU_OT_DSL_CLONES, DMU_OT_NONE,
 			    0, tx);
 		}
 
 		VERIFY0(zap_add_int(dp->dp_meta_objset,
 		    dsl_dir_phys(origin->ds_dir)->dd_clones,
 		    ds->ds_object, tx));
 
 		dsl_dataset_rele(origin, FTAG);
 	}
 	return (0);
 }
 
 void
 dsl_pool_upgrade_dir_clones(dsl_pool_t *dp, dmu_tx_t *tx)
 {
 	ASSERT(dmu_tx_is_syncing(tx));
 	uint64_t obj;
 
 	(void) dsl_dir_create_sync(dp, dp->dp_root_dir, FREE_DIR_NAME, tx);
 	VERIFY0(dsl_pool_open_special_dir(dp,
 	    FREE_DIR_NAME, &dp->dp_free_dir));
 
 	/*
 	 * We can't use bpobj_alloc(), because spa_version() still
 	 * returns the old version, and we need a new-version bpobj with
 	 * subobj support.  So call dmu_object_alloc() directly.
 	 */
 	obj = dmu_object_alloc(dp->dp_meta_objset, DMU_OT_BPOBJ,
 	    SPA_OLD_MAXBLOCKSIZE, DMU_OT_BPOBJ_HDR, sizeof (bpobj_phys_t), tx);
 	VERIFY0(zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
 	    DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj, tx));
 	VERIFY0(bpobj_open(&dp->dp_free_bpobj, dp->dp_meta_objset, obj));
 
 	VERIFY0(dmu_objset_find_dp(dp, dp->dp_root_dir_obj,
 	    upgrade_dir_clones_cb, tx, DS_FIND_CHILDREN | DS_FIND_SERIALIZE));
 }
 
 void
 dsl_pool_create_origin(dsl_pool_t *dp, dmu_tx_t *tx)
 {
 	uint64_t dsobj;
 	dsl_dataset_t *ds;
 
 	ASSERT(dmu_tx_is_syncing(tx));
 	ASSERT(dp->dp_origin_snap == NULL);
 	ASSERT(rrw_held(&dp->dp_config_rwlock, RW_WRITER));
 
 	/* create the origin dir, ds, & snap-ds */
 	dsobj = dsl_dataset_create_sync(dp->dp_root_dir, ORIGIN_DIR_NAME,
 	    NULL, 0, kcred, tx);
 	VERIFY0(dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds));
 	dsl_dataset_snapshot_sync_impl(ds, ORIGIN_DIR_NAME, tx);
 	VERIFY0(dsl_dataset_hold_obj(dp, dsl_dataset_phys(ds)->ds_prev_snap_obj,
 	    dp, &dp->dp_origin_snap));
 	dsl_dataset_rele(ds, FTAG);
 }
 
 taskq_t *
 dsl_pool_vnrele_taskq(dsl_pool_t *dp)
 {
 	return (dp->dp_vnrele_taskq);
 }
 
 /*
  * Walk through the pool-wide zap object of temporary snapshot user holds
  * and release them.
  */
 void
 dsl_pool_clean_tmp_userrefs(dsl_pool_t *dp)
 {
 	zap_attribute_t za;
 	zap_cursor_t zc;
 	objset_t *mos = dp->dp_meta_objset;
 	uint64_t zapobj = dp->dp_tmp_userrefs_obj;
 	nvlist_t *holds;
 
 	if (zapobj == 0)
 		return;
 	ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS);
 
 	holds = fnvlist_alloc();
 
 	for (zap_cursor_init(&zc, mos, zapobj);
 	    zap_cursor_retrieve(&zc, &za) == 0;
 	    zap_cursor_advance(&zc)) {
 		char *htag;
 		nvlist_t *tags;
 
 		htag = strchr(za.za_name, '-');
 		*htag = '\0';
 		++htag;
 		if (nvlist_lookup_nvlist(holds, za.za_name, &tags) != 0) {
 			tags = fnvlist_alloc();
 			fnvlist_add_boolean(tags, htag);
 			fnvlist_add_nvlist(holds, za.za_name, tags);
 			fnvlist_free(tags);
 		} else {
 			fnvlist_add_boolean(tags, htag);
 		}
 	}
 	dsl_dataset_user_release_tmp(dp, holds);
 	fnvlist_free(holds);
 	zap_cursor_fini(&zc);
 }
 
 /*
  * Create the pool-wide zap object for storing temporary snapshot holds.
  */
 void
 dsl_pool_user_hold_create_obj(dsl_pool_t *dp, dmu_tx_t *tx)
 {
 	objset_t *mos = dp->dp_meta_objset;
 
 	ASSERT(dp->dp_tmp_userrefs_obj == 0);
 	ASSERT(dmu_tx_is_syncing(tx));
 
 	dp->dp_tmp_userrefs_obj = zap_create_link(mos, DMU_OT_USERREFS,
 	    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_TMP_USERREFS, tx);
 }
 
 static int
 dsl_pool_user_hold_rele_impl(dsl_pool_t *dp, uint64_t dsobj,
     const char *tag, uint64_t now, dmu_tx_t *tx, boolean_t holding)
 {
 	objset_t *mos = dp->dp_meta_objset;
 	uint64_t zapobj = dp->dp_tmp_userrefs_obj;
 	char *name;
 	int error;
 
 	ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS);
 	ASSERT(dmu_tx_is_syncing(tx));
 
 	/*
 	 * If the pool was created prior to SPA_VERSION_USERREFS, the
 	 * zap object for temporary holds might not exist yet.
 	 */
 	if (zapobj == 0) {
 		if (holding) {
 			dsl_pool_user_hold_create_obj(dp, tx);
 			zapobj = dp->dp_tmp_userrefs_obj;
 		} else {
 			return (SET_ERROR(ENOENT));
 		}
 	}
 
 	name = kmem_asprintf("%llx-%s", (u_longlong_t)dsobj, tag);
 	if (holding)
 		error = zap_add(mos, zapobj, name, 8, 1, &now, tx);
 	else
 		error = zap_remove(mos, zapobj, name, tx);
 	strfree(name);
 
 	return (error);
 }
 
 /*
  * Add a temporary hold for the given dataset object and tag.
  */
 int
 dsl_pool_user_hold(dsl_pool_t *dp, uint64_t dsobj, const char *tag,
     uint64_t now, dmu_tx_t *tx)
 {
 	return (dsl_pool_user_hold_rele_impl(dp, dsobj, tag, now, tx, B_TRUE));
 }
 
 /*
  * Release a temporary hold for the given dataset object and tag.
  */
 int
 dsl_pool_user_release(dsl_pool_t *dp, uint64_t dsobj, const char *tag,
     dmu_tx_t *tx)
 {
 	return (dsl_pool_user_hold_rele_impl(dp, dsobj, tag, 0,
 	    tx, B_FALSE));
 }
 
 /*
  * DSL Pool Configuration Lock
  *
  * The dp_config_rwlock protects against changes to DSL state (e.g. dataset
  * creation / destruction / rename / property setting).  It must be held for
  * read to hold a dataset or dsl_dir.  I.e. you must call
  * dsl_pool_config_enter() or dsl_pool_hold() before calling
  * dsl_{dataset,dir}_hold{_obj}.  In most circumstances, the dp_config_rwlock
  * must be held continuously until all datasets and dsl_dirs are released.
  *
  * The only exception to this rule is that if a "long hold" is placed on
  * a dataset, then the dp_config_rwlock may be dropped while the dataset
  * is still held.  The long hold will prevent the dataset from being
  * destroyed -- the destroy will fail with EBUSY.  A long hold can be
  * obtained by calling dsl_dataset_long_hold(), or by "owning" a dataset
  * (by calling dsl_{dataset,objset}_{try}own{_obj}).
  *
  * Legitimate long-holders (including owners) should be long-running, cancelable
  * tasks that should cause "zfs destroy" to fail.  This includes DMU
  * consumers (i.e. a ZPL filesystem being mounted or ZVOL being open),
  * "zfs send", and "zfs diff".  There are several other long-holders whose
  * uses are suboptimal (e.g. "zfs promote", and zil_suspend()).
  *
  * The usual formula for long-holding would be:
  * dsl_pool_hold()
  * dsl_dataset_hold()
  * ... perform checks ...
  * dsl_dataset_long_hold()
  * dsl_pool_rele()
  * ... perform long-running task ...
  * dsl_dataset_long_rele()
  * dsl_dataset_rele()
  *
  * Note that when the long hold is released, the dataset is still held but
  * the pool is not held.  The dataset may change arbitrarily during this time
  * (e.g. it could be destroyed).  Therefore you shouldn't do anything to the
  * dataset except release it.
  *
  * User-initiated operations (e.g. ioctls, zfs_ioc_*()) are either read-only
  * or modifying operations.
  *
  * Modifying operations should generally use dsl_sync_task().  The synctask
  * infrastructure enforces proper locking strategy with respect to the
  * dp_config_rwlock.  See the comment above dsl_sync_task() for details.
  *
  * Read-only operations will manually hold the pool, then the dataset, obtain
  * information from the dataset, then release the pool and dataset.
  * dmu_objset_{hold,rele}() are convenience routines that also do the pool
  * hold/rele.
  */
 
 int
 dsl_pool_hold(const char *name, void *tag, dsl_pool_t **dp)
 {
 	spa_t *spa;
 	int error;
 
 	error = spa_open(name, &spa, tag);
 	if (error == 0) {
 		*dp = spa_get_dsl(spa);
 		dsl_pool_config_enter(*dp, tag);
 	}
 	return (error);
 }
 
 void
 dsl_pool_rele(dsl_pool_t *dp, void *tag)
 {
 	dsl_pool_config_exit(dp, tag);
 	spa_close(dp->dp_spa, tag);
 }
 
 void
 dsl_pool_config_enter(dsl_pool_t *dp, void *tag)
 {
 	/*
 	 * We use a "reentrant" reader-writer lock, but not reentrantly.
 	 *
 	 * The rrwlock can (with the track_all flag) track all reading threads,
 	 * which is very useful for debugging which code path failed to release
 	 * the lock, and for verifying that the *current* thread does hold
 	 * the lock.
 	 *
 	 * (Unlike a rwlock, which knows that N threads hold it for
 	 * read, but not *which* threads, so rw_held(RW_READER) returns TRUE
 	 * if any thread holds it for read, even if this thread doesn't).
 	 */
 	ASSERT(!rrw_held(&dp->dp_config_rwlock, RW_READER));
 	rrw_enter(&dp->dp_config_rwlock, RW_READER, tag);
 }
 
 void
 dsl_pool_config_enter_prio(dsl_pool_t *dp, void *tag)
 {
 	ASSERT(!rrw_held(&dp->dp_config_rwlock, RW_READER));
 	rrw_enter_read_prio(&dp->dp_config_rwlock, tag);
 }
 
 void
 dsl_pool_config_exit(dsl_pool_t *dp, void *tag)
 {
 	rrw_exit(&dp->dp_config_rwlock, tag);
 }
 
 boolean_t
 dsl_pool_config_held(dsl_pool_t *dp)
 {
 	return (RRW_LOCK_HELD(&dp->dp_config_rwlock));
 }
 
 boolean_t
 dsl_pool_config_held_writer(dsl_pool_t *dp)
 {
 	return (RRW_WRITE_HELD(&dp->dp_config_rwlock));
 }
Index: projects/clang391-import/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
===================================================================
--- projects/clang391-import/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c	(revision 309262)
+++ projects/clang391-import/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c	(revision 309263)
@@ -1,7351 +1,7356 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
  * Copyright (c) 2015, Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 2013 Martin Matuska <mm@FreeBSD.org>. All rights reserved.
  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
  * Copyright 2013 Saso Kiselkov. All rights reserved.
  * Copyright (c) 2014 Integros [integros.com]
  * Copyright 2016 Toomas Soome <tsoome@me.com>
  */
 
 /*
  * SPA: Storage Pool Allocator
  *
  * This file contains all the routines used when modifying on-disk SPA state.
  * This includes opening, importing, destroying, exporting a pool, and syncing a
  * pool.
  */
 
 #include <sys/zfs_context.h>
 #include <sys/fm/fs/zfs.h>
 #include <sys/spa_impl.h>
 #include <sys/zio.h>
 #include <sys/zio_checksum.h>
 #include <sys/dmu.h>
 #include <sys/dmu_tx.h>
 #include <sys/zap.h>
 #include <sys/zil.h>
 #include <sys/ddt.h>
 #include <sys/vdev_impl.h>
 #include <sys/metaslab.h>
 #include <sys/metaslab_impl.h>
 #include <sys/uberblock_impl.h>
 #include <sys/txg.h>
 #include <sys/avl.h>
 #include <sys/dmu_traverse.h>
 #include <sys/dmu_objset.h>
 #include <sys/unique.h>
 #include <sys/dsl_pool.h>
 #include <sys/dsl_dataset.h>
 #include <sys/dsl_dir.h>
 #include <sys/dsl_prop.h>
 #include <sys/dsl_synctask.h>
 #include <sys/fs/zfs.h>
 #include <sys/arc.h>
 #include <sys/callb.h>
 #include <sys/spa_boot.h>
 #include <sys/zfs_ioctl.h>
 #include <sys/dsl_scan.h>
 #include <sys/dmu_send.h>
 #include <sys/dsl_destroy.h>
 #include <sys/dsl_userhold.h>
 #include <sys/zfeature.h>
 #include <sys/zvol.h>
 #include <sys/trim_map.h>
 
 #ifdef	_KERNEL
 #include <sys/callb.h>
 #include <sys/cpupart.h>
 #include <sys/zone.h>
 #endif	/* _KERNEL */
 
 #include "zfs_prop.h"
 #include "zfs_comutil.h"
 
 /* Check hostid on import? */
 static int check_hostid = 1;
 
 /*
  * The interval, in seconds, at which failed configuration cache file writes
  * should be retried.
  */
 static int zfs_ccw_retry_interval = 300;
 
 SYSCTL_DECL(_vfs_zfs);
 SYSCTL_INT(_vfs_zfs, OID_AUTO, check_hostid, CTLFLAG_RWTUN, &check_hostid, 0,
     "Check hostid on import?");
 TUNABLE_INT("vfs.zfs.ccw_retry_interval", &zfs_ccw_retry_interval);
 SYSCTL_INT(_vfs_zfs, OID_AUTO, ccw_retry_interval, CTLFLAG_RW,
     &zfs_ccw_retry_interval, 0,
     "Configuration cache file write, retry after failure, interval (seconds)");
 
 typedef enum zti_modes {
 	ZTI_MODE_FIXED,			/* value is # of threads (min 1) */
 	ZTI_MODE_BATCH,			/* cpu-intensive; value is ignored */
 	ZTI_MODE_NULL,			/* don't create a taskq */
 	ZTI_NMODES
 } zti_modes_t;
 
 #define	ZTI_P(n, q)	{ ZTI_MODE_FIXED, (n), (q) }
 #define	ZTI_BATCH	{ ZTI_MODE_BATCH, 0, 1 }
 #define	ZTI_NULL	{ ZTI_MODE_NULL, 0, 0 }
 
 #define	ZTI_N(n)	ZTI_P(n, 1)
 #define	ZTI_ONE		ZTI_N(1)
 
 typedef struct zio_taskq_info {
 	zti_modes_t zti_mode;
 	uint_t zti_value;
 	uint_t zti_count;
 } zio_taskq_info_t;
 
 static const char *const zio_taskq_types[ZIO_TASKQ_TYPES] = {
 	"issue", "issue_high", "intr", "intr_high"
 };
 
 /*
  * This table defines the taskq settings for each ZFS I/O type. When
  * initializing a pool, we use this table to create an appropriately sized
  * taskq. Some operations are low volume and therefore have a small, static
  * number of threads assigned to their taskqs using the ZTI_N(#) or ZTI_ONE
  * macros. Other operations process a large amount of data; the ZTI_BATCH
  * macro causes us to create a taskq oriented for throughput. Some operations
  * are so high frequency and short-lived that the taskq itself can become a a
  * point of lock contention. The ZTI_P(#, #) macro indicates that we need an
  * additional degree of parallelism specified by the number of threads per-
  * taskq and the number of taskqs; when dispatching an event in this case, the
  * particular taskq is chosen at random.
  *
  * The different taskq priorities are to handle the different contexts (issue
  * and interrupt) and then to reserve threads for ZIO_PRIORITY_NOW I/Os that
  * need to be handled with minimum delay.
  */
 const zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = {
 	/* ISSUE	ISSUE_HIGH	INTR		INTR_HIGH */
 	{ ZTI_ONE,	ZTI_NULL,	ZTI_ONE,	ZTI_NULL }, /* NULL */
 	{ ZTI_N(8),	ZTI_NULL,	ZTI_P(12, 8),	ZTI_NULL }, /* READ */
 	{ ZTI_BATCH,	ZTI_N(5),	ZTI_N(8),	ZTI_N(5) }, /* WRITE */
 	{ ZTI_P(12, 8),	ZTI_NULL,	ZTI_ONE,	ZTI_NULL }, /* FREE */
 	{ ZTI_ONE,	ZTI_NULL,	ZTI_ONE,	ZTI_NULL }, /* CLAIM */
 	{ ZTI_ONE,	ZTI_NULL,	ZTI_ONE,	ZTI_NULL }, /* IOCTL */
 };
 
 static sysevent_t *spa_event_create(spa_t *spa, vdev_t *vd, const char *name);
 static void spa_event_post(sysevent_t *ev);
 static void spa_sync_version(void *arg, dmu_tx_t *tx);
 static void spa_sync_props(void *arg, dmu_tx_t *tx);
 static boolean_t spa_has_active_shared_spare(spa_t *spa);
 static int spa_load_impl(spa_t *spa, uint64_t, nvlist_t *config,
     spa_load_state_t state, spa_import_type_t type, boolean_t mosconfig,
     char **ereport);
 static void spa_vdev_resilver_done(spa_t *spa);
 
 uint_t		zio_taskq_batch_pct = 75;	/* 1 thread per cpu in pset */
 #ifdef PSRSET_BIND
 id_t		zio_taskq_psrset_bind = PS_NONE;
 #endif
 #ifdef SYSDC
 boolean_t	zio_taskq_sysdc = B_TRUE;	/* use SDC scheduling class */
 #endif
 uint_t		zio_taskq_basedc = 80;		/* base duty cycle */
 
 boolean_t	spa_create_process = B_TRUE;	/* no process ==> no sysdc */
 extern int	zfs_sync_pass_deferred_free;
 
 #ifndef illumos
 extern void spa_deadman(void *arg);
 #endif
 
 /*
  * This (illegal) pool name is used when temporarily importing a spa_t in order
  * to get the vdev stats associated with the imported devices.
  */
 #define	TRYIMPORT_NAME	"$import"
 
 /*
  * ==========================================================================
  * SPA properties routines
  * ==========================================================================
  */
 
 /*
  * Add a (source=src, propname=propval) list to an nvlist.
  */
 static void
 spa_prop_add_list(nvlist_t *nvl, zpool_prop_t prop, char *strval,
     uint64_t intval, zprop_source_t src)
 {
 	const char *propname = zpool_prop_to_name(prop);
 	nvlist_t *propval;
 
 	VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 	VERIFY(nvlist_add_uint64(propval, ZPROP_SOURCE, src) == 0);
 
 	if (strval != NULL)
 		VERIFY(nvlist_add_string(propval, ZPROP_VALUE, strval) == 0);
 	else
 		VERIFY(nvlist_add_uint64(propval, ZPROP_VALUE, intval) == 0);
 
 	VERIFY(nvlist_add_nvlist(nvl, propname, propval) == 0);
 	nvlist_free(propval);
 }
 
 /*
  * Get property values from the spa configuration.
  */
 static void
 spa_prop_get_config(spa_t *spa, nvlist_t **nvp)
 {
 	vdev_t *rvd = spa->spa_root_vdev;
 	dsl_pool_t *pool = spa->spa_dsl_pool;
 	uint64_t size, alloc, cap, version;
 	zprop_source_t src = ZPROP_SRC_NONE;
 	spa_config_dirent_t *dp;
 	metaslab_class_t *mc = spa_normal_class(spa);
 
 	ASSERT(MUTEX_HELD(&spa->spa_props_lock));
 
 	if (rvd != NULL) {
 		alloc = metaslab_class_get_alloc(spa_normal_class(spa));
 		size = metaslab_class_get_space(spa_normal_class(spa));
 		spa_prop_add_list(*nvp, ZPOOL_PROP_NAME, spa_name(spa), 0, src);
 		spa_prop_add_list(*nvp, ZPOOL_PROP_SIZE, NULL, size, src);
 		spa_prop_add_list(*nvp, ZPOOL_PROP_ALLOCATED, NULL, alloc, src);
 		spa_prop_add_list(*nvp, ZPOOL_PROP_FREE, NULL,
 		    size - alloc, src);
 
 		spa_prop_add_list(*nvp, ZPOOL_PROP_FRAGMENTATION, NULL,
 		    metaslab_class_fragmentation(mc), src);
 		spa_prop_add_list(*nvp, ZPOOL_PROP_EXPANDSZ, NULL,
 		    metaslab_class_expandable_space(mc), src);
 		spa_prop_add_list(*nvp, ZPOOL_PROP_READONLY, NULL,
 		    (spa_mode(spa) == FREAD), src);
 
 		cap = (size == 0) ? 0 : (alloc * 100 / size);
 		spa_prop_add_list(*nvp, ZPOOL_PROP_CAPACITY, NULL, cap, src);
 
 		spa_prop_add_list(*nvp, ZPOOL_PROP_DEDUPRATIO, NULL,
 		    ddt_get_pool_dedup_ratio(spa), src);
 
 		spa_prop_add_list(*nvp, ZPOOL_PROP_HEALTH, NULL,
 		    rvd->vdev_state, src);
 
 		version = spa_version(spa);
 		if (version == zpool_prop_default_numeric(ZPOOL_PROP_VERSION))
 			src = ZPROP_SRC_DEFAULT;
 		else
 			src = ZPROP_SRC_LOCAL;
 		spa_prop_add_list(*nvp, ZPOOL_PROP_VERSION, NULL, version, src);
 	}
 
 	if (pool != NULL) {
 		/*
 		 * The $FREE directory was introduced in SPA_VERSION_DEADLISTS,
 		 * when opening pools before this version freedir will be NULL.
 		 */
 		if (pool->dp_free_dir != NULL) {
 			spa_prop_add_list(*nvp, ZPOOL_PROP_FREEING, NULL,
 			    dsl_dir_phys(pool->dp_free_dir)->dd_used_bytes,
 			    src);
 		} else {
 			spa_prop_add_list(*nvp, ZPOOL_PROP_FREEING,
 			    NULL, 0, src);
 		}
 
 		if (pool->dp_leak_dir != NULL) {
 			spa_prop_add_list(*nvp, ZPOOL_PROP_LEAKED, NULL,
 			    dsl_dir_phys(pool->dp_leak_dir)->dd_used_bytes,
 			    src);
 		} else {
 			spa_prop_add_list(*nvp, ZPOOL_PROP_LEAKED,
 			    NULL, 0, src);
 		}
 	}
 
 	spa_prop_add_list(*nvp, ZPOOL_PROP_GUID, NULL, spa_guid(spa), src);
 
 	if (spa->spa_comment != NULL) {
 		spa_prop_add_list(*nvp, ZPOOL_PROP_COMMENT, spa->spa_comment,
 		    0, ZPROP_SRC_LOCAL);
 	}
 
 	if (spa->spa_root != NULL)
 		spa_prop_add_list(*nvp, ZPOOL_PROP_ALTROOT, spa->spa_root,
 		    0, ZPROP_SRC_LOCAL);
 
 	if (spa_feature_is_enabled(spa, SPA_FEATURE_LARGE_BLOCKS)) {
 		spa_prop_add_list(*nvp, ZPOOL_PROP_MAXBLOCKSIZE, NULL,
 		    MIN(zfs_max_recordsize, SPA_MAXBLOCKSIZE), ZPROP_SRC_NONE);
 	} else {
 		spa_prop_add_list(*nvp, ZPOOL_PROP_MAXBLOCKSIZE, NULL,
 		    SPA_OLD_MAXBLOCKSIZE, ZPROP_SRC_NONE);
 	}
 
 	if ((dp = list_head(&spa->spa_config_list)) != NULL) {
 		if (dp->scd_path == NULL) {
 			spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE,
 			    "none", 0, ZPROP_SRC_LOCAL);
 		} else if (strcmp(dp->scd_path, spa_config_path) != 0) {
 			spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE,
 			    dp->scd_path, 0, ZPROP_SRC_LOCAL);
 		}
 	}
 }
 
 /*
  * Get zpool property values.
  */
 int
 spa_prop_get(spa_t *spa, nvlist_t **nvp)
 {
 	objset_t *mos = spa->spa_meta_objset;
 	zap_cursor_t zc;
 	zap_attribute_t za;
 	int err;
 
 	VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 
 	mutex_enter(&spa->spa_props_lock);
 
 	/*
 	 * Get properties from the spa config.
 	 */
 	spa_prop_get_config(spa, nvp);
 
 	/* If no pool property object, no more prop to get. */
 	if (mos == NULL || spa->spa_pool_props_object == 0) {
 		mutex_exit(&spa->spa_props_lock);
 		return (0);
 	}
 
 	/*
 	 * Get properties from the MOS pool property object.
 	 */
 	for (zap_cursor_init(&zc, mos, spa->spa_pool_props_object);
 	    (err = zap_cursor_retrieve(&zc, &za)) == 0;
 	    zap_cursor_advance(&zc)) {
 		uint64_t intval = 0;
 		char *strval = NULL;
 		zprop_source_t src = ZPROP_SRC_DEFAULT;
 		zpool_prop_t prop;
 
 		if ((prop = zpool_name_to_prop(za.za_name)) == ZPROP_INVAL)
 			continue;
 
 		switch (za.za_integer_length) {
 		case 8:
 			/* integer property */
 			if (za.za_first_integer !=
 			    zpool_prop_default_numeric(prop))
 				src = ZPROP_SRC_LOCAL;
 
 			if (prop == ZPOOL_PROP_BOOTFS) {
 				dsl_pool_t *dp;
 				dsl_dataset_t *ds = NULL;
 
 				dp = spa_get_dsl(spa);
 				dsl_pool_config_enter(dp, FTAG);
 				if (err = dsl_dataset_hold_obj(dp,
 				    za.za_first_integer, FTAG, &ds)) {
 					dsl_pool_config_exit(dp, FTAG);
 					break;
 				}
 
 				strval = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN,
 				    KM_SLEEP);
 				dsl_dataset_name(ds, strval);
 				dsl_dataset_rele(ds, FTAG);
 				dsl_pool_config_exit(dp, FTAG);
 			} else {
 				strval = NULL;
 				intval = za.za_first_integer;
 			}
 
 			spa_prop_add_list(*nvp, prop, strval, intval, src);
 
 			if (strval != NULL)
 				kmem_free(strval, ZFS_MAX_DATASET_NAME_LEN);
 
 			break;
 
 		case 1:
 			/* string property */
 			strval = kmem_alloc(za.za_num_integers, KM_SLEEP);
 			err = zap_lookup(mos, spa->spa_pool_props_object,
 			    za.za_name, 1, za.za_num_integers, strval);
 			if (err) {
 				kmem_free(strval, za.za_num_integers);
 				break;
 			}
 			spa_prop_add_list(*nvp, prop, strval, 0, src);
 			kmem_free(strval, za.za_num_integers);
 			break;
 
 		default:
 			break;
 		}
 	}
 	zap_cursor_fini(&zc);
 	mutex_exit(&spa->spa_props_lock);
 out:
 	if (err && err != ENOENT) {
 		nvlist_free(*nvp);
 		*nvp = NULL;
 		return (err);
 	}
 
 	return (0);
 }
 
 /*
  * Validate the given pool properties nvlist and modify the list
  * for the property values to be set.
  */
 static int
 spa_prop_validate(spa_t *spa, nvlist_t *props)
 {
 	nvpair_t *elem;
 	int error = 0, reset_bootfs = 0;
 	uint64_t objnum = 0;
 	boolean_t has_feature = B_FALSE;
 
 	elem = NULL;
 	while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
 		uint64_t intval;
 		char *strval, *slash, *check, *fname;
 		const char *propname = nvpair_name(elem);
 		zpool_prop_t prop = zpool_name_to_prop(propname);
 
 		switch (prop) {
 		case ZPROP_INVAL:
 			if (!zpool_prop_feature(propname)) {
 				error = SET_ERROR(EINVAL);
 				break;
 			}
 
 			/*
 			 * Sanitize the input.
 			 */
 			if (nvpair_type(elem) != DATA_TYPE_UINT64) {
 				error = SET_ERROR(EINVAL);
 				break;
 			}
 
 			if (nvpair_value_uint64(elem, &intval) != 0) {
 				error = SET_ERROR(EINVAL);
 				break;
 			}
 
 			if (intval != 0) {
 				error = SET_ERROR(EINVAL);
 				break;
 			}
 
 			fname = strchr(propname, '@') + 1;
 			if (zfeature_lookup_name(fname, NULL) != 0) {
 				error = SET_ERROR(EINVAL);
 				break;
 			}
 
 			has_feature = B_TRUE;
 			break;
 
 		case ZPOOL_PROP_VERSION:
 			error = nvpair_value_uint64(elem, &intval);
 			if (!error &&
 			    (intval < spa_version(spa) ||
 			    intval > SPA_VERSION_BEFORE_FEATURES ||
 			    has_feature))
 				error = SET_ERROR(EINVAL);
 			break;
 
 		case ZPOOL_PROP_DELEGATION:
 		case ZPOOL_PROP_AUTOREPLACE:
 		case ZPOOL_PROP_LISTSNAPS:
 		case ZPOOL_PROP_AUTOEXPAND:
 			error = nvpair_value_uint64(elem, &intval);
 			if (!error && intval > 1)
 				error = SET_ERROR(EINVAL);
 			break;
 
 		case ZPOOL_PROP_BOOTFS:
 			/*
 			 * If the pool version is less than SPA_VERSION_BOOTFS,
 			 * or the pool is still being created (version == 0),
 			 * the bootfs property cannot be set.
 			 */
 			if (spa_version(spa) < SPA_VERSION_BOOTFS) {
 				error = SET_ERROR(ENOTSUP);
 				break;
 			}
 
 			/*
 			 * Make sure the vdev config is bootable
 			 */
 			if (!vdev_is_bootable(spa->spa_root_vdev)) {
 				error = SET_ERROR(ENOTSUP);
 				break;
 			}
 
 			reset_bootfs = 1;
 
 			error = nvpair_value_string(elem, &strval);
 
 			if (!error) {
 				objset_t *os;
 				uint64_t propval;
 
 				if (strval == NULL || strval[0] == '\0') {
 					objnum = zpool_prop_default_numeric(
 					    ZPOOL_PROP_BOOTFS);
 					break;
 				}
 
 				if (error = dmu_objset_hold(strval, FTAG, &os))
 					break;
 
 				/*
 				 * Must be ZPL, and its property settings
 				 * must be supported by GRUB (compression
 				 * is not gzip, and large blocks are not used).
 				 */
 
 				if (dmu_objset_type(os) != DMU_OST_ZFS) {
 					error = SET_ERROR(ENOTSUP);
 				} else if ((error =
 				    dsl_prop_get_int_ds(dmu_objset_ds(os),
 				    zfs_prop_to_name(ZFS_PROP_COMPRESSION),
 				    &propval)) == 0 &&
 				    !BOOTFS_COMPRESS_VALID(propval)) {
 					error = SET_ERROR(ENOTSUP);
 				} else {
 					objnum = dmu_objset_id(os);
 				}
 				dmu_objset_rele(os, FTAG);
 			}
 			break;
 
 		case ZPOOL_PROP_FAILUREMODE:
 			error = nvpair_value_uint64(elem, &intval);
 			if (!error && (intval < ZIO_FAILURE_MODE_WAIT ||
 			    intval > ZIO_FAILURE_MODE_PANIC))
 				error = SET_ERROR(EINVAL);
 
 			/*
 			 * This is a special case which only occurs when
 			 * the pool has completely failed. This allows
 			 * the user to change the in-core failmode property
 			 * without syncing it out to disk (I/Os might
 			 * currently be blocked). We do this by returning
 			 * EIO to the caller (spa_prop_set) to trick it
 			 * into thinking we encountered a property validation
 			 * error.
 			 */
 			if (!error && spa_suspended(spa)) {
 				spa->spa_failmode = intval;
 				error = SET_ERROR(EIO);
 			}
 			break;
 
 		case ZPOOL_PROP_CACHEFILE:
 			if ((error = nvpair_value_string(elem, &strval)) != 0)
 				break;
 
 			if (strval[0] == '\0')
 				break;
 
 			if (strcmp(strval, "none") == 0)
 				break;
 
 			if (strval[0] != '/') {
 				error = SET_ERROR(EINVAL);
 				break;
 			}
 
 			slash = strrchr(strval, '/');
 			ASSERT(slash != NULL);
 
 			if (slash[1] == '\0' || strcmp(slash, "/.") == 0 ||
 			    strcmp(slash, "/..") == 0)
 				error = SET_ERROR(EINVAL);
 			break;
 
 		case ZPOOL_PROP_COMMENT:
 			if ((error = nvpair_value_string(elem, &strval)) != 0)
 				break;
 			for (check = strval; *check != '\0'; check++) {
 				/*
 				 * The kernel doesn't have an easy isprint()
 				 * check.  For this kernel check, we merely
 				 * check ASCII apart from DEL.  Fix this if
 				 * there is an easy-to-use kernel isprint().
 				 */
 				if (*check >= 0x7f) {
 					error = SET_ERROR(EINVAL);
 					break;
 				}
 			}
 			if (strlen(strval) > ZPROP_MAX_COMMENT)
 				error = E2BIG;
 			break;
 
 		case ZPOOL_PROP_DEDUPDITTO:
 			if (spa_version(spa) < SPA_VERSION_DEDUP)
 				error = SET_ERROR(ENOTSUP);
 			else
 				error = nvpair_value_uint64(elem, &intval);
 			if (error == 0 &&
 			    intval != 0 && intval < ZIO_DEDUPDITTO_MIN)
 				error = SET_ERROR(EINVAL);
 			break;
 		}
 
 		if (error)
 			break;
 	}
 
 	if (!error && reset_bootfs) {
 		error = nvlist_remove(props,
 		    zpool_prop_to_name(ZPOOL_PROP_BOOTFS), DATA_TYPE_STRING);
 
 		if (!error) {
 			error = nvlist_add_uint64(props,
 			    zpool_prop_to_name(ZPOOL_PROP_BOOTFS), objnum);
 		}
 	}
 
 	return (error);
 }
 
 void
 spa_configfile_set(spa_t *spa, nvlist_t *nvp, boolean_t need_sync)
 {
 	char *cachefile;
 	spa_config_dirent_t *dp;
 
 	if (nvlist_lookup_string(nvp, zpool_prop_to_name(ZPOOL_PROP_CACHEFILE),
 	    &cachefile) != 0)
 		return;
 
 	dp = kmem_alloc(sizeof (spa_config_dirent_t),
 	    KM_SLEEP);
 
 	if (cachefile[0] == '\0')
 		dp->scd_path = spa_strdup(spa_config_path);
 	else if (strcmp(cachefile, "none") == 0)
 		dp->scd_path = NULL;
 	else
 		dp->scd_path = spa_strdup(cachefile);
 
 	list_insert_head(&spa->spa_config_list, dp);
 	if (need_sync)
 		spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE);
 }
 
 int
 spa_prop_set(spa_t *spa, nvlist_t *nvp)
 {
 	int error;
 	nvpair_t *elem = NULL;
 	boolean_t need_sync = B_FALSE;
 
 	if ((error = spa_prop_validate(spa, nvp)) != 0)
 		return (error);
 
 	while ((elem = nvlist_next_nvpair(nvp, elem)) != NULL) {
 		zpool_prop_t prop = zpool_name_to_prop(nvpair_name(elem));
 
 		if (prop == ZPOOL_PROP_CACHEFILE ||
 		    prop == ZPOOL_PROP_ALTROOT ||
 		    prop == ZPOOL_PROP_READONLY)
 			continue;
 
 		if (prop == ZPOOL_PROP_VERSION || prop == ZPROP_INVAL) {
 			uint64_t ver;
 
 			if (prop == ZPOOL_PROP_VERSION) {
 				VERIFY(nvpair_value_uint64(elem, &ver) == 0);
 			} else {
 				ASSERT(zpool_prop_feature(nvpair_name(elem)));
 				ver = SPA_VERSION_FEATURES;
 				need_sync = B_TRUE;
 			}
 
 			/* Save time if the version is already set. */
 			if (ver == spa_version(spa))
 				continue;
 
 			/*
 			 * In addition to the pool directory object, we might
 			 * create the pool properties object, the features for
 			 * read object, the features for write object, or the
 			 * feature descriptions object.
 			 */
 			error = dsl_sync_task(spa->spa_name, NULL,
 			    spa_sync_version, &ver,
 			    6, ZFS_SPACE_CHECK_RESERVED);
 			if (error)
 				return (error);
 			continue;
 		}
 
 		need_sync = B_TRUE;
 		break;
 	}
 
 	if (need_sync) {
 		return (dsl_sync_task(spa->spa_name, NULL, spa_sync_props,
 		    nvp, 6, ZFS_SPACE_CHECK_RESERVED));
 	}
 
 	return (0);
 }
 
 /*
  * If the bootfs property value is dsobj, clear it.
  */
 void
 spa_prop_clear_bootfs(spa_t *spa, uint64_t dsobj, dmu_tx_t *tx)
 {
 	if (spa->spa_bootfs == dsobj && spa->spa_pool_props_object != 0) {
 		VERIFY(zap_remove(spa->spa_meta_objset,
 		    spa->spa_pool_props_object,
 		    zpool_prop_to_name(ZPOOL_PROP_BOOTFS), tx) == 0);
 		spa->spa_bootfs = 0;
 	}
 }
 
 /*ARGSUSED*/
 static int
 spa_change_guid_check(void *arg, dmu_tx_t *tx)
 {
 	uint64_t *newguid = arg;
 	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
 	vdev_t *rvd = spa->spa_root_vdev;
 	uint64_t vdev_state;
 
 	spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
 	vdev_state = rvd->vdev_state;
 	spa_config_exit(spa, SCL_STATE, FTAG);
 
 	if (vdev_state != VDEV_STATE_HEALTHY)
 		return (SET_ERROR(ENXIO));
 
 	ASSERT3U(spa_guid(spa), !=, *newguid);
 
 	return (0);
 }
 
 static void
 spa_change_guid_sync(void *arg, dmu_tx_t *tx)
 {
 	uint64_t *newguid = arg;
 	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
 	uint64_t oldguid;
 	vdev_t *rvd = spa->spa_root_vdev;
 
 	oldguid = spa_guid(spa);
 
 	spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
 	rvd->vdev_guid = *newguid;
 	rvd->vdev_guid_sum += (*newguid - oldguid);
 	vdev_config_dirty(rvd);
 	spa_config_exit(spa, SCL_STATE, FTAG);
 
 	spa_history_log_internal(spa, "guid change", tx, "old=%llu new=%llu",
 	    oldguid, *newguid);
 }
 
 /*
  * Change the GUID for the pool.  This is done so that we can later
  * re-import a pool built from a clone of our own vdevs.  We will modify
  * the root vdev's guid, our own pool guid, and then mark all of our
  * vdevs dirty.  Note that we must make sure that all our vdevs are
  * online when we do this, or else any vdevs that weren't present
  * would be orphaned from our pool.  We are also going to issue a
  * sysevent to update any watchers.
  */
 int
 spa_change_guid(spa_t *spa)
 {
 	int error;
 	uint64_t guid;
 
 	mutex_enter(&spa->spa_vdev_top_lock);
 	mutex_enter(&spa_namespace_lock);
 	guid = spa_generate_guid(NULL);
 
 	error = dsl_sync_task(spa->spa_name, spa_change_guid_check,
 	    spa_change_guid_sync, &guid, 5, ZFS_SPACE_CHECK_RESERVED);
 
 	if (error == 0) {
 		spa_config_sync(spa, B_FALSE, B_TRUE);
 		spa_event_notify(spa, NULL, ESC_ZFS_POOL_REGUID);
 	}
 
 	mutex_exit(&spa_namespace_lock);
 	mutex_exit(&spa->spa_vdev_top_lock);
 
 	return (error);
 }
 
 /*
  * ==========================================================================
  * SPA state manipulation (open/create/destroy/import/export)
  * ==========================================================================
  */
 
 static int
 spa_error_entry_compare(const void *a, const void *b)
 {
 	spa_error_entry_t *sa = (spa_error_entry_t *)a;
 	spa_error_entry_t *sb = (spa_error_entry_t *)b;
 	int ret;
 
 	ret = bcmp(&sa->se_bookmark, &sb->se_bookmark,
 	    sizeof (zbookmark_phys_t));
 
 	if (ret < 0)
 		return (-1);
 	else if (ret > 0)
 		return (1);
 	else
 		return (0);
 }
 
 /*
  * Utility function which retrieves copies of the current logs and
  * re-initializes them in the process.
  */
 void
 spa_get_errlists(spa_t *spa, avl_tree_t *last, avl_tree_t *scrub)
 {
 	ASSERT(MUTEX_HELD(&spa->spa_errlist_lock));
 
 	bcopy(&spa->spa_errlist_last, last, sizeof (avl_tree_t));
 	bcopy(&spa->spa_errlist_scrub, scrub, sizeof (avl_tree_t));
 
 	avl_create(&spa->spa_errlist_scrub,
 	    spa_error_entry_compare, sizeof (spa_error_entry_t),
 	    offsetof(spa_error_entry_t, se_avl));
 	avl_create(&spa->spa_errlist_last,
 	    spa_error_entry_compare, sizeof (spa_error_entry_t),
 	    offsetof(spa_error_entry_t, se_avl));
 }
 
 static void
 spa_taskqs_init(spa_t *spa, zio_type_t t, zio_taskq_type_t q)
 {
 	const zio_taskq_info_t *ztip = &zio_taskqs[t][q];
 	enum zti_modes mode = ztip->zti_mode;
 	uint_t value = ztip->zti_value;
 	uint_t count = ztip->zti_count;
 	spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q];
 	char name[32];
 	uint_t flags = 0;
 	boolean_t batch = B_FALSE;
 
 	if (mode == ZTI_MODE_NULL) {
 		tqs->stqs_count = 0;
 		tqs->stqs_taskq = NULL;
 		return;
 	}
 
 	ASSERT3U(count, >, 0);
 
 	tqs->stqs_count = count;
 	tqs->stqs_taskq = kmem_alloc(count * sizeof (taskq_t *), KM_SLEEP);
 
 	switch (mode) {
 	case ZTI_MODE_FIXED:
 		ASSERT3U(value, >=, 1);
 		value = MAX(value, 1);
 		break;
 
 	case ZTI_MODE_BATCH:
 		batch = B_TRUE;
 		flags |= TASKQ_THREADS_CPU_PCT;
 		value = zio_taskq_batch_pct;
 		break;
 
 	default:
 		panic("unrecognized mode for %s_%s taskq (%u:%u) in "
 		    "spa_activate()",
 		    zio_type_name[t], zio_taskq_types[q], mode, value);
 		break;
 	}
 
 	for (uint_t i = 0; i < count; i++) {
 		taskq_t *tq;
 
 		if (count > 1) {
 			(void) snprintf(name, sizeof (name), "%s_%s_%u",
 			    zio_type_name[t], zio_taskq_types[q], i);
 		} else {
 			(void) snprintf(name, sizeof (name), "%s_%s",
 			    zio_type_name[t], zio_taskq_types[q]);
 		}
 
 #ifdef SYSDC
 		if (zio_taskq_sysdc && spa->spa_proc != &p0) {
 			if (batch)
 				flags |= TASKQ_DC_BATCH;
 
 			tq = taskq_create_sysdc(name, value, 50, INT_MAX,
 			    spa->spa_proc, zio_taskq_basedc, flags);
 		} else {
 #endif
 			pri_t pri = maxclsyspri;
 			/*
 			 * The write issue taskq can be extremely CPU
 			 * intensive.  Run it at slightly lower priority
 			 * than the other taskqs.
 			 */
 			if (t == ZIO_TYPE_WRITE && q == ZIO_TASKQ_ISSUE)
 				pri++;
 
 			tq = taskq_create_proc(name, value, pri, 50,
 			    INT_MAX, spa->spa_proc, flags);
 #ifdef SYSDC
 		}
 #endif
 
 		tqs->stqs_taskq[i] = tq;
 	}
 }
 
 static void
 spa_taskqs_fini(spa_t *spa, zio_type_t t, zio_taskq_type_t q)
 {
 	spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q];
 
 	if (tqs->stqs_taskq == NULL) {
 		ASSERT0(tqs->stqs_count);
 		return;
 	}
 
 	for (uint_t i = 0; i < tqs->stqs_count; i++) {
 		ASSERT3P(tqs->stqs_taskq[i], !=, NULL);
 		taskq_destroy(tqs->stqs_taskq[i]);
 	}
 
 	kmem_free(tqs->stqs_taskq, tqs->stqs_count * sizeof (taskq_t *));
 	tqs->stqs_taskq = NULL;
 }
 
 /*
  * Dispatch a task to the appropriate taskq for the ZFS I/O type and priority.
  * Note that a type may have multiple discrete taskqs to avoid lock contention
  * on the taskq itself. In that case we choose which taskq at random by using
  * the low bits of gethrtime().
  */
 void
 spa_taskq_dispatch_ent(spa_t *spa, zio_type_t t, zio_taskq_type_t q,
     task_func_t *func, void *arg, uint_t flags, taskq_ent_t *ent)
 {
 	spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q];
 	taskq_t *tq;
 
 	ASSERT3P(tqs->stqs_taskq, !=, NULL);
 	ASSERT3U(tqs->stqs_count, !=, 0);
 
 	if (tqs->stqs_count == 1) {
 		tq = tqs->stqs_taskq[0];
 	} else {
 #ifdef _KERNEL
 		tq = tqs->stqs_taskq[cpu_ticks() % tqs->stqs_count];
 #else
 		tq = tqs->stqs_taskq[gethrtime() % tqs->stqs_count];
 #endif
 	}
 
 	taskq_dispatch_ent(tq, func, arg, flags, ent);
 }
 
 static void
 spa_create_zio_taskqs(spa_t *spa)
 {
 	for (int t = 0; t < ZIO_TYPES; t++) {
 		for (int q = 0; q < ZIO_TASKQ_TYPES; q++) {
 			spa_taskqs_init(spa, t, q);
 		}
 	}
 }
 
 #ifdef _KERNEL
 #ifdef SPA_PROCESS
 static void
 spa_thread(void *arg)
 {
 	callb_cpr_t cprinfo;
 
 	spa_t *spa = arg;
 	user_t *pu = PTOU(curproc);
 
 	CALLB_CPR_INIT(&cprinfo, &spa->spa_proc_lock, callb_generic_cpr,
 	    spa->spa_name);
 
 	ASSERT(curproc != &p0);
 	(void) snprintf(pu->u_psargs, sizeof (pu->u_psargs),
 	    "zpool-%s", spa->spa_name);
 	(void) strlcpy(pu->u_comm, pu->u_psargs, sizeof (pu->u_comm));
 
 #ifdef PSRSET_BIND
 	/* bind this thread to the requested psrset */
 	if (zio_taskq_psrset_bind != PS_NONE) {
 		pool_lock();
 		mutex_enter(&cpu_lock);
 		mutex_enter(&pidlock);
 		mutex_enter(&curproc->p_lock);
 
 		if (cpupart_bind_thread(curthread, zio_taskq_psrset_bind,
 		    0, NULL, NULL) == 0)  {
 			curthread->t_bind_pset = zio_taskq_psrset_bind;
 		} else {
 			cmn_err(CE_WARN,
 			    "Couldn't bind process for zfs pool \"%s\" to "
 			    "pset %d\n", spa->spa_name, zio_taskq_psrset_bind);
 		}
 
 		mutex_exit(&curproc->p_lock);
 		mutex_exit(&pidlock);
 		mutex_exit(&cpu_lock);
 		pool_unlock();
 	}
 #endif
 
 #ifdef SYSDC
 	if (zio_taskq_sysdc) {
 		sysdc_thread_enter(curthread, 100, 0);
 	}
 #endif
 
 	spa->spa_proc = curproc;
 	spa->spa_did = curthread->t_did;
 
 	spa_create_zio_taskqs(spa);
 
 	mutex_enter(&spa->spa_proc_lock);
 	ASSERT(spa->spa_proc_state == SPA_PROC_CREATED);
 
 	spa->spa_proc_state = SPA_PROC_ACTIVE;
 	cv_broadcast(&spa->spa_proc_cv);
 
 	CALLB_CPR_SAFE_BEGIN(&cprinfo);
 	while (spa->spa_proc_state == SPA_PROC_ACTIVE)
 		cv_wait(&spa->spa_proc_cv, &spa->spa_proc_lock);
 	CALLB_CPR_SAFE_END(&cprinfo, &spa->spa_proc_lock);
 
 	ASSERT(spa->spa_proc_state == SPA_PROC_DEACTIVATE);
 	spa->spa_proc_state = SPA_PROC_GONE;
 	spa->spa_proc = &p0;
 	cv_broadcast(&spa->spa_proc_cv);
 	CALLB_CPR_EXIT(&cprinfo);	/* drops spa_proc_lock */
 
 	mutex_enter(&curproc->p_lock);
 	lwp_exit();
 }
 #endif	/* SPA_PROCESS */
 #endif
 
 /*
  * Activate an uninitialized pool.
  */
 static void
 spa_activate(spa_t *spa, int mode)
 {
 	ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED);
 
 	spa->spa_state = POOL_STATE_ACTIVE;
 	spa->spa_mode = mode;
 
 	spa->spa_normal_class = metaslab_class_create(spa, zfs_metaslab_ops);
 	spa->spa_log_class = metaslab_class_create(spa, zfs_metaslab_ops);
 
 	/* Try to create a covering process */
 	mutex_enter(&spa->spa_proc_lock);
 	ASSERT(spa->spa_proc_state == SPA_PROC_NONE);
 	ASSERT(spa->spa_proc == &p0);
 	spa->spa_did = 0;
 
 #ifdef SPA_PROCESS
 	/* Only create a process if we're going to be around a while. */
 	if (spa_create_process && strcmp(spa->spa_name, TRYIMPORT_NAME) != 0) {
 		if (newproc(spa_thread, (caddr_t)spa, syscid, maxclsyspri,
 		    NULL, 0) == 0) {
 			spa->spa_proc_state = SPA_PROC_CREATED;
 			while (spa->spa_proc_state == SPA_PROC_CREATED) {
 				cv_wait(&spa->spa_proc_cv,
 				    &spa->spa_proc_lock);
 			}
 			ASSERT(spa->spa_proc_state == SPA_PROC_ACTIVE);
 			ASSERT(spa->spa_proc != &p0);
 			ASSERT(spa->spa_did != 0);
 		} else {
 #ifdef _KERNEL
 			cmn_err(CE_WARN,
 			    "Couldn't create process for zfs pool \"%s\"\n",
 			    spa->spa_name);
 #endif
 		}
 	}
 #endif	/* SPA_PROCESS */
 	mutex_exit(&spa->spa_proc_lock);
 
 	/* If we didn't create a process, we need to create our taskqs. */
 	ASSERT(spa->spa_proc == &p0);
 	if (spa->spa_proc == &p0) {
 		spa_create_zio_taskqs(spa);
 	}
 
 	/*
 	 * Start TRIM thread.
 	 */
 	trim_thread_create(spa);
 
 	list_create(&spa->spa_config_dirty_list, sizeof (vdev_t),
 	    offsetof(vdev_t, vdev_config_dirty_node));
 	list_create(&spa->spa_evicting_os_list, sizeof (objset_t),
 	    offsetof(objset_t, os_evicting_node));
 	list_create(&spa->spa_state_dirty_list, sizeof (vdev_t),
 	    offsetof(vdev_t, vdev_state_dirty_node));
 
 	txg_list_create(&spa->spa_vdev_txg_list,
 	    offsetof(struct vdev, vdev_txg_node));
 
 	avl_create(&spa->spa_errlist_scrub,
 	    spa_error_entry_compare, sizeof (spa_error_entry_t),
 	    offsetof(spa_error_entry_t, se_avl));
 	avl_create(&spa->spa_errlist_last,
 	    spa_error_entry_compare, sizeof (spa_error_entry_t),
 	    offsetof(spa_error_entry_t, se_avl));
 }
 
 /*
  * Opposite of spa_activate().
  */
 static void
 spa_deactivate(spa_t *spa)
 {
 	ASSERT(spa->spa_sync_on == B_FALSE);
 	ASSERT(spa->spa_dsl_pool == NULL);
 	ASSERT(spa->spa_root_vdev == NULL);
 	ASSERT(spa->spa_async_zio_root == NULL);
 	ASSERT(spa->spa_state != POOL_STATE_UNINITIALIZED);
 
 	/*
 	 * Stop TRIM thread in case spa_unload() wasn't called directly
 	 * before spa_deactivate().
 	 */
 	trim_thread_destroy(spa);
 
 	spa_evicting_os_wait(spa);
 
 	txg_list_destroy(&spa->spa_vdev_txg_list);
 
 	list_destroy(&spa->spa_config_dirty_list);
 	list_destroy(&spa->spa_evicting_os_list);
 	list_destroy(&spa->spa_state_dirty_list);
 
 	for (int t = 0; t < ZIO_TYPES; t++) {
 		for (int q = 0; q < ZIO_TASKQ_TYPES; q++) {
 			spa_taskqs_fini(spa, t, q);
 		}
 	}
 
 	metaslab_class_destroy(spa->spa_normal_class);
 	spa->spa_normal_class = NULL;
 
 	metaslab_class_destroy(spa->spa_log_class);
 	spa->spa_log_class = NULL;
 
 	/*
 	 * If this was part of an import or the open otherwise failed, we may
 	 * still have errors left in the queues.  Empty them just in case.
 	 */
 	spa_errlog_drain(spa);
 
 	avl_destroy(&spa->spa_errlist_scrub);
 	avl_destroy(&spa->spa_errlist_last);
 
 	spa->spa_state = POOL_STATE_UNINITIALIZED;
 
 	mutex_enter(&spa->spa_proc_lock);
 	if (spa->spa_proc_state != SPA_PROC_NONE) {
 		ASSERT(spa->spa_proc_state == SPA_PROC_ACTIVE);
 		spa->spa_proc_state = SPA_PROC_DEACTIVATE;
 		cv_broadcast(&spa->spa_proc_cv);
 		while (spa->spa_proc_state == SPA_PROC_DEACTIVATE) {
 			ASSERT(spa->spa_proc != &p0);
 			cv_wait(&spa->spa_proc_cv, &spa->spa_proc_lock);
 		}
 		ASSERT(spa->spa_proc_state == SPA_PROC_GONE);
 		spa->spa_proc_state = SPA_PROC_NONE;
 	}
 	ASSERT(spa->spa_proc == &p0);
 	mutex_exit(&spa->spa_proc_lock);
 
 #ifdef SPA_PROCESS
 	/*
 	 * We want to make sure spa_thread() has actually exited the ZFS
 	 * module, so that the module can't be unloaded out from underneath
 	 * it.
 	 */
 	if (spa->spa_did != 0) {
 		thread_join(spa->spa_did);
 		spa->spa_did = 0;
 	}
 #endif	/* SPA_PROCESS */
 }
 
 /*
  * Verify a pool configuration, and construct the vdev tree appropriately.  This
  * will create all the necessary vdevs in the appropriate layout, with each vdev
  * in the CLOSED state.  This will prep the pool before open/creation/import.
  * All vdev validation is done by the vdev_alloc() routine.
  */
 static int
 spa_config_parse(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent,
     uint_t id, int atype)
 {
 	nvlist_t **child;
 	uint_t children;
 	int error;
 
 	if ((error = vdev_alloc(spa, vdp, nv, parent, id, atype)) != 0)
 		return (error);
 
 	if ((*vdp)->vdev_ops->vdev_op_leaf)
 		return (0);
 
 	error = nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
 	    &child, &children);
 
 	if (error == ENOENT)
 		return (0);
 
 	if (error) {
 		vdev_free(*vdp);
 		*vdp = NULL;
 		return (SET_ERROR(EINVAL));
 	}
 
 	for (int c = 0; c < children; c++) {
 		vdev_t *vd;
 		if ((error = spa_config_parse(spa, &vd, child[c], *vdp, c,
 		    atype)) != 0) {
 			vdev_free(*vdp);
 			*vdp = NULL;
 			return (error);
 		}
 	}
 
 	ASSERT(*vdp != NULL);
 
 	return (0);
 }
 
 /*
  * Opposite of spa_load().
  */
 static void
 spa_unload(spa_t *spa)
 {
 	int i;
 
 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
 
 	/*
 	 * Stop TRIM thread.
 	 */
 	trim_thread_destroy(spa);
 
 	/*
 	 * Stop async tasks.
 	 */
 	spa_async_suspend(spa);
 
 	/*
 	 * Stop syncing.
 	 */
 	if (spa->spa_sync_on) {
 		txg_sync_stop(spa->spa_dsl_pool);
 		spa->spa_sync_on = B_FALSE;
 	}
 
 	/*
 	 * Wait for any outstanding async I/O to complete.
 	 */
 	if (spa->spa_async_zio_root != NULL) {
 		for (int i = 0; i < max_ncpus; i++)
 			(void) zio_wait(spa->spa_async_zio_root[i]);
 		kmem_free(spa->spa_async_zio_root, max_ncpus * sizeof (void *));
 		spa->spa_async_zio_root = NULL;
 	}
 
 	bpobj_close(&spa->spa_deferred_bpobj);
 
 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
 
 	/*
 	 * Close all vdevs.
 	 */
 	if (spa->spa_root_vdev)
 		vdev_free(spa->spa_root_vdev);
 	ASSERT(spa->spa_root_vdev == NULL);
 
 	/*
 	 * Close the dsl pool.
 	 */
 	if (spa->spa_dsl_pool) {
 		dsl_pool_close(spa->spa_dsl_pool);
 		spa->spa_dsl_pool = NULL;
 		spa->spa_meta_objset = NULL;
 	}
 
 	ddt_unload(spa);
 
 	/*
 	 * Drop and purge level 2 cache
 	 */
 	spa_l2cache_drop(spa);
 
 	for (i = 0; i < spa->spa_spares.sav_count; i++)
 		vdev_free(spa->spa_spares.sav_vdevs[i]);
 	if (spa->spa_spares.sav_vdevs) {
 		kmem_free(spa->spa_spares.sav_vdevs,
 		    spa->spa_spares.sav_count * sizeof (void *));
 		spa->spa_spares.sav_vdevs = NULL;
 	}
 	if (spa->spa_spares.sav_config) {
 		nvlist_free(spa->spa_spares.sav_config);
 		spa->spa_spares.sav_config = NULL;
 	}
 	spa->spa_spares.sav_count = 0;
 
 	for (i = 0; i < spa->spa_l2cache.sav_count; i++) {
 		vdev_clear_stats(spa->spa_l2cache.sav_vdevs[i]);
 		vdev_free(spa->spa_l2cache.sav_vdevs[i]);
 	}
 	if (spa->spa_l2cache.sav_vdevs) {
 		kmem_free(spa->spa_l2cache.sav_vdevs,
 		    spa->spa_l2cache.sav_count * sizeof (void *));
 		spa->spa_l2cache.sav_vdevs = NULL;
 	}
 	if (spa->spa_l2cache.sav_config) {
 		nvlist_free(spa->spa_l2cache.sav_config);
 		spa->spa_l2cache.sav_config = NULL;
 	}
 	spa->spa_l2cache.sav_count = 0;
 
 	spa->spa_async_suspended = 0;
 
 	if (spa->spa_comment != NULL) {
 		spa_strfree(spa->spa_comment);
 		spa->spa_comment = NULL;
 	}
 
 	spa_config_exit(spa, SCL_ALL, FTAG);
 }
 
 /*
  * Load (or re-load) the current list of vdevs describing the active spares for
  * this pool.  When this is called, we have some form of basic information in
  * 'spa_spares.sav_config'.  We parse this into vdevs, try to open them, and
  * then re-generate a more complete list including status information.
  */
 static void
 spa_load_spares(spa_t *spa)
 {
 	nvlist_t **spares;
 	uint_t nspares;
 	int i;
 	vdev_t *vd, *tvd;
 
 	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
 
 	/*
 	 * First, close and free any existing spare vdevs.
 	 */
 	for (i = 0; i < spa->spa_spares.sav_count; i++) {
 		vd = spa->spa_spares.sav_vdevs[i];
 
 		/* Undo the call to spa_activate() below */
 		if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid,
 		    B_FALSE)) != NULL && tvd->vdev_isspare)
 			spa_spare_remove(tvd);
 		vdev_close(vd);
 		vdev_free(vd);
 	}
 
 	if (spa->spa_spares.sav_vdevs)
 		kmem_free(spa->spa_spares.sav_vdevs,
 		    spa->spa_spares.sav_count * sizeof (void *));
 
 	if (spa->spa_spares.sav_config == NULL)
 		nspares = 0;
 	else
 		VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config,
 		    ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0);
 
 	spa->spa_spares.sav_count = (int)nspares;
 	spa->spa_spares.sav_vdevs = NULL;
 
 	if (nspares == 0)
 		return;
 
 	/*
 	 * Construct the array of vdevs, opening them to get status in the
 	 * process.   For each spare, there is potentially two different vdev_t
 	 * structures associated with it: one in the list of spares (used only
 	 * for basic validation purposes) and one in the active vdev
 	 * configuration (if it's spared in).  During this phase we open and
 	 * validate each vdev on the spare list.  If the vdev also exists in the
 	 * active configuration, then we also mark this vdev as an active spare.
 	 */
 	spa->spa_spares.sav_vdevs = kmem_alloc(nspares * sizeof (void *),
 	    KM_SLEEP);
 	for (i = 0; i < spa->spa_spares.sav_count; i++) {
 		VERIFY(spa_config_parse(spa, &vd, spares[i], NULL, 0,
 		    VDEV_ALLOC_SPARE) == 0);
 		ASSERT(vd != NULL);
 
 		spa->spa_spares.sav_vdevs[i] = vd;
 
 		if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid,
 		    B_FALSE)) != NULL) {
 			if (!tvd->vdev_isspare)
 				spa_spare_add(tvd);
 
 			/*
 			 * We only mark the spare active if we were successfully
 			 * able to load the vdev.  Otherwise, importing a pool
 			 * with a bad active spare would result in strange
 			 * behavior, because multiple pool would think the spare
 			 * is actively in use.
 			 *
 			 * There is a vulnerability here to an equally bizarre
 			 * circumstance, where a dead active spare is later
 			 * brought back to life (onlined or otherwise).  Given
 			 * the rarity of this scenario, and the extra complexity
 			 * it adds, we ignore the possibility.
 			 */
 			if (!vdev_is_dead(tvd))
 				spa_spare_activate(tvd);
 		}
 
 		vd->vdev_top = vd;
 		vd->vdev_aux = &spa->spa_spares;
 
 		if (vdev_open(vd) != 0)
 			continue;
 
 		if (vdev_validate_aux(vd) == 0)
 			spa_spare_add(vd);
 	}
 
 	/*
 	 * Recompute the stashed list of spares, with status information
 	 * this time.
 	 */
 	VERIFY(nvlist_remove(spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES,
 	    DATA_TYPE_NVLIST_ARRAY) == 0);
 
 	spares = kmem_alloc(spa->spa_spares.sav_count * sizeof (void *),
 	    KM_SLEEP);
 	for (i = 0; i < spa->spa_spares.sav_count; i++)
 		spares[i] = vdev_config_generate(spa,
 		    spa->spa_spares.sav_vdevs[i], B_TRUE, VDEV_CONFIG_SPARE);
 	VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config,
 	    ZPOOL_CONFIG_SPARES, spares, spa->spa_spares.sav_count) == 0);
 	for (i = 0; i < spa->spa_spares.sav_count; i++)
 		nvlist_free(spares[i]);
 	kmem_free(spares, spa->spa_spares.sav_count * sizeof (void *));
 }
 
 /*
  * Load (or re-load) the current list of vdevs describing the active l2cache for
  * this pool.  When this is called, we have some form of basic information in
  * 'spa_l2cache.sav_config'.  We parse this into vdevs, try to open them, and
  * then re-generate a more complete list including status information.
  * Devices which are already active have their details maintained, and are
  * not re-opened.
  */
 static void
 spa_load_l2cache(spa_t *spa)
 {
 	nvlist_t **l2cache;
 	uint_t nl2cache;
 	int i, j, oldnvdevs;
 	uint64_t guid;
 	vdev_t *vd, **oldvdevs, **newvdevs;
 	spa_aux_vdev_t *sav = &spa->spa_l2cache;
 
 	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
 
 	if (sav->sav_config != NULL) {
 		VERIFY(nvlist_lookup_nvlist_array(sav->sav_config,
 		    ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0);
 		newvdevs = kmem_alloc(nl2cache * sizeof (void *), KM_SLEEP);
 	} else {
 		nl2cache = 0;
 		newvdevs = NULL;
 	}
 
 	oldvdevs = sav->sav_vdevs;
 	oldnvdevs = sav->sav_count;
 	sav->sav_vdevs = NULL;
 	sav->sav_count = 0;
 
 	/*
 	 * Process new nvlist of vdevs.
 	 */
 	for (i = 0; i < nl2cache; i++) {
 		VERIFY(nvlist_lookup_uint64(l2cache[i], ZPOOL_CONFIG_GUID,
 		    &guid) == 0);
 
 		newvdevs[i] = NULL;
 		for (j = 0; j < oldnvdevs; j++) {
 			vd = oldvdevs[j];
 			if (vd != NULL && guid == vd->vdev_guid) {
 				/*
 				 * Retain previous vdev for add/remove ops.
 				 */
 				newvdevs[i] = vd;
 				oldvdevs[j] = NULL;
 				break;
 			}
 		}
 
 		if (newvdevs[i] == NULL) {
 			/*
 			 * Create new vdev
 			 */
 			VERIFY(spa_config_parse(spa, &vd, l2cache[i], NULL, 0,
 			    VDEV_ALLOC_L2CACHE) == 0);
 			ASSERT(vd != NULL);
 			newvdevs[i] = vd;
 
 			/*
 			 * Commit this vdev as an l2cache device,
 			 * even if it fails to open.
 			 */
 			spa_l2cache_add(vd);
 
 			vd->vdev_top = vd;
 			vd->vdev_aux = sav;
 
 			spa_l2cache_activate(vd);
 
 			if (vdev_open(vd) != 0)
 				continue;
 
 			(void) vdev_validate_aux(vd);
 
 			if (!vdev_is_dead(vd))
 				l2arc_add_vdev(spa, vd);
 		}
 	}
 
 	/*
 	 * Purge vdevs that were dropped
 	 */
 	for (i = 0; i < oldnvdevs; i++) {
 		uint64_t pool;
 
 		vd = oldvdevs[i];
 		if (vd != NULL) {
 			ASSERT(vd->vdev_isl2cache);
 
 			if (spa_l2cache_exists(vd->vdev_guid, &pool) &&
 			    pool != 0ULL && l2arc_vdev_present(vd))
 				l2arc_remove_vdev(vd);
 			vdev_clear_stats(vd);
 			vdev_free(vd);
 		}
 	}
 
 	if (oldvdevs)
 		kmem_free(oldvdevs, oldnvdevs * sizeof (void *));
 
 	if (sav->sav_config == NULL)
 		goto out;
 
 	sav->sav_vdevs = newvdevs;
 	sav->sav_count = (int)nl2cache;
 
 	/*
 	 * Recompute the stashed list of l2cache devices, with status
 	 * information this time.
 	 */
 	VERIFY(nvlist_remove(sav->sav_config, ZPOOL_CONFIG_L2CACHE,
 	    DATA_TYPE_NVLIST_ARRAY) == 0);
 
 	l2cache = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP);
 	for (i = 0; i < sav->sav_count; i++)
 		l2cache[i] = vdev_config_generate(spa,
 		    sav->sav_vdevs[i], B_TRUE, VDEV_CONFIG_L2CACHE);
 	VERIFY(nvlist_add_nvlist_array(sav->sav_config,
 	    ZPOOL_CONFIG_L2CACHE, l2cache, sav->sav_count) == 0);
 out:
 	for (i = 0; i < sav->sav_count; i++)
 		nvlist_free(l2cache[i]);
 	if (sav->sav_count)
 		kmem_free(l2cache, sav->sav_count * sizeof (void *));
 }
 
 static int
 load_nvlist(spa_t *spa, uint64_t obj, nvlist_t **value)
 {
 	dmu_buf_t *db;
 	char *packed = NULL;
 	size_t nvsize = 0;
 	int error;
 	*value = NULL;
 
 	error = dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db);
 	if (error != 0)
 		return (error);
 
 	nvsize = *(uint64_t *)db->db_data;
 	dmu_buf_rele(db, FTAG);
 
 	packed = kmem_alloc(nvsize, KM_SLEEP);
 	error = dmu_read(spa->spa_meta_objset, obj, 0, nvsize, packed,
 	    DMU_READ_PREFETCH);
 	if (error == 0)
 		error = nvlist_unpack(packed, nvsize, value, 0);
 	kmem_free(packed, nvsize);
 
 	return (error);
 }
 
 /*
  * Checks to see if the given vdev could not be opened, in which case we post a
  * sysevent to notify the autoreplace code that the device has been removed.
  */
 static void
 spa_check_removed(vdev_t *vd)
 {
 	for (int c = 0; c < vd->vdev_children; c++)
 		spa_check_removed(vd->vdev_child[c]);
 
 	if (vd->vdev_ops->vdev_op_leaf && vdev_is_dead(vd) &&
 	    !vd->vdev_ishole) {
 		zfs_post_autoreplace(vd->vdev_spa, vd);
 		spa_event_notify(vd->vdev_spa, vd, ESC_ZFS_VDEV_CHECK);
 	}
 }
 
 static void
 spa_config_valid_zaps(vdev_t *vd, vdev_t *mvd)
 {
 	ASSERT3U(vd->vdev_children, ==, mvd->vdev_children);
 
 	vd->vdev_top_zap = mvd->vdev_top_zap;
 	vd->vdev_leaf_zap = mvd->vdev_leaf_zap;
 
 	for (uint64_t i = 0; i < vd->vdev_children; i++) {
 		spa_config_valid_zaps(vd->vdev_child[i], mvd->vdev_child[i]);
 	}
 }
 
 /*
  * Validate the current config against the MOS config
  */
 static boolean_t
 spa_config_valid(spa_t *spa, nvlist_t *config)
 {
 	vdev_t *mrvd, *rvd = spa->spa_root_vdev;
 	nvlist_t *nv;
 
 	VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nv) == 0);
 
 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
 	VERIFY(spa_config_parse(spa, &mrvd, nv, NULL, 0, VDEV_ALLOC_LOAD) == 0);
 
 	ASSERT3U(rvd->vdev_children, ==, mrvd->vdev_children);
 
 	/*
 	 * If we're doing a normal import, then build up any additional
 	 * diagnostic information about missing devices in this config.
 	 * We'll pass this up to the user for further processing.
 	 */
 	if (!(spa->spa_import_flags & ZFS_IMPORT_MISSING_LOG)) {
 		nvlist_t **child, *nv;
 		uint64_t idx = 0;
 
 		child = kmem_alloc(rvd->vdev_children * sizeof (nvlist_t **),
 		    KM_SLEEP);
 		VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 
 		for (int c = 0; c < rvd->vdev_children; c++) {
 			vdev_t *tvd = rvd->vdev_child[c];
 			vdev_t *mtvd  = mrvd->vdev_child[c];
 
 			if (tvd->vdev_ops == &vdev_missing_ops &&
 			    mtvd->vdev_ops != &vdev_missing_ops &&
 			    mtvd->vdev_islog)
 				child[idx++] = vdev_config_generate(spa, mtvd,
 				    B_FALSE, 0);
 		}
 
 		if (idx) {
 			VERIFY(nvlist_add_nvlist_array(nv,
 			    ZPOOL_CONFIG_CHILDREN, child, idx) == 0);
 			VERIFY(nvlist_add_nvlist(spa->spa_load_info,
 			    ZPOOL_CONFIG_MISSING_DEVICES, nv) == 0);
 
 			for (int i = 0; i < idx; i++)
 				nvlist_free(child[i]);
 		}
 		nvlist_free(nv);
 		kmem_free(child, rvd->vdev_children * sizeof (char **));
 	}
 
 	/*
 	 * Compare the root vdev tree with the information we have
 	 * from the MOS config (mrvd). Check each top-level vdev
 	 * with the corresponding MOS config top-level (mtvd).
 	 */
 	for (int c = 0; c < rvd->vdev_children; c++) {
 		vdev_t *tvd = rvd->vdev_child[c];
 		vdev_t *mtvd  = mrvd->vdev_child[c];
 
 		/*
 		 * Resolve any "missing" vdevs in the current configuration.
 		 * If we find that the MOS config has more accurate information
 		 * about the top-level vdev then use that vdev instead.
 		 */
 		if (tvd->vdev_ops == &vdev_missing_ops &&
 		    mtvd->vdev_ops != &vdev_missing_ops) {
 
 			if (!(spa->spa_import_flags & ZFS_IMPORT_MISSING_LOG))
 				continue;
 
 			/*
 			 * Device specific actions.
 			 */
 			if (mtvd->vdev_islog) {
 				spa_set_log_state(spa, SPA_LOG_CLEAR);
 			} else {
 				/*
 				 * XXX - once we have 'readonly' pool
 				 * support we should be able to handle
 				 * missing data devices by transitioning
 				 * the pool to readonly.
 				 */
 				continue;
 			}
 
 			/*
 			 * Swap the missing vdev with the data we were
 			 * able to obtain from the MOS config.
 			 */
 			vdev_remove_child(rvd, tvd);
 			vdev_remove_child(mrvd, mtvd);
 
 			vdev_add_child(rvd, mtvd);
 			vdev_add_child(mrvd, tvd);
 
 			spa_config_exit(spa, SCL_ALL, FTAG);
 			vdev_load(mtvd);
 			spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
 
 			vdev_reopen(rvd);
 		} else {
 			if (mtvd->vdev_islog) {
 				/*
 				 * Load the slog device's state from the MOS
 				 * config since it's possible that the label
 				 * does not contain the most up-to-date
 				 * information.
 				 */
 				vdev_load_log_state(tvd, mtvd);
 				vdev_reopen(tvd);
 			}
 
 			/*
 			 * Per-vdev ZAP info is stored exclusively in the MOS.
 			 */
 			spa_config_valid_zaps(tvd, mtvd);
 		}
 	}
 
 	vdev_free(mrvd);
 	spa_config_exit(spa, SCL_ALL, FTAG);
 
 	/*
 	 * Ensure we were able to validate the config.
 	 */
 	return (rvd->vdev_guid_sum == spa->spa_uberblock.ub_guid_sum);
 }
 
 /*
  * Check for missing log devices
  */
 static boolean_t
 spa_check_logs(spa_t *spa)
 {
 	boolean_t rv = B_FALSE;
 	dsl_pool_t *dp = spa_get_dsl(spa);
 
 	switch (spa->spa_log_state) {
 	case SPA_LOG_MISSING:
 		/* need to recheck in case slog has been restored */
 	case SPA_LOG_UNKNOWN:
 		rv = (dmu_objset_find_dp(dp, dp->dp_root_dir_obj,
 		    zil_check_log_chain, NULL, DS_FIND_CHILDREN) != 0);
 		if (rv)
 			spa_set_log_state(spa, SPA_LOG_MISSING);
 		break;
 	}
 	return (rv);
 }
 
 static boolean_t
 spa_passivate_log(spa_t *spa)
 {
 	vdev_t *rvd = spa->spa_root_vdev;
 	boolean_t slog_found = B_FALSE;
 
 	ASSERT(spa_config_held(spa, SCL_ALLOC, RW_WRITER));
 
 	if (!spa_has_slogs(spa))
 		return (B_FALSE);
 
 	for (int c = 0; c < rvd->vdev_children; c++) {
 		vdev_t *tvd = rvd->vdev_child[c];
 		metaslab_group_t *mg = tvd->vdev_mg;
 
 		if (tvd->vdev_islog) {
 			metaslab_group_passivate(mg);
 			slog_found = B_TRUE;
 		}
 	}
 
 	return (slog_found);
 }
 
 static void
 spa_activate_log(spa_t *spa)
 {
 	vdev_t *rvd = spa->spa_root_vdev;
 
 	ASSERT(spa_config_held(spa, SCL_ALLOC, RW_WRITER));
 
 	for (int c = 0; c < rvd->vdev_children; c++) {
 		vdev_t *tvd = rvd->vdev_child[c];
 		metaslab_group_t *mg = tvd->vdev_mg;
 
 		if (tvd->vdev_islog)
 			metaslab_group_activate(mg);
 	}
 }
 
 int
 spa_offline_log(spa_t *spa)
 {
 	int error;
 
 	error = dmu_objset_find(spa_name(spa), zil_vdev_offline,
 	    NULL, DS_FIND_CHILDREN);
 	if (error == 0) {
 		/*
 		 * We successfully offlined the log device, sync out the
 		 * current txg so that the "stubby" block can be removed
 		 * by zil_sync().
 		 */
 		txg_wait_synced(spa->spa_dsl_pool, 0);
 	}
 	return (error);
 }
 
 static void
 spa_aux_check_removed(spa_aux_vdev_t *sav)
 {
 	int i;
 
 	for (i = 0; i < sav->sav_count; i++)
 		spa_check_removed(sav->sav_vdevs[i]);
 }
 
 void
 spa_claim_notify(zio_t *zio)
 {
 	spa_t *spa = zio->io_spa;
 
 	if (zio->io_error)
 		return;
 
 	mutex_enter(&spa->spa_props_lock);	/* any mutex will do */
 	if (spa->spa_claim_max_txg < zio->io_bp->blk_birth)
 		spa->spa_claim_max_txg = zio->io_bp->blk_birth;
 	mutex_exit(&spa->spa_props_lock);
 }
 
 typedef struct spa_load_error {
 	uint64_t	sle_meta_count;
 	uint64_t	sle_data_count;
 } spa_load_error_t;
 
 static void
 spa_load_verify_done(zio_t *zio)
 {
 	blkptr_t *bp = zio->io_bp;
 	spa_load_error_t *sle = zio->io_private;
 	dmu_object_type_t type = BP_GET_TYPE(bp);
 	int error = zio->io_error;
 	spa_t *spa = zio->io_spa;
 
 	if (error) {
 		if ((BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type)) &&
 		    type != DMU_OT_INTENT_LOG)
 			atomic_inc_64(&sle->sle_meta_count);
 		else
 			atomic_inc_64(&sle->sle_data_count);
 	}
 	zio_data_buf_free(zio->io_data, zio->io_size);
 
 	mutex_enter(&spa->spa_scrub_lock);
 	spa->spa_scrub_inflight--;
 	cv_broadcast(&spa->spa_scrub_io_cv);
 	mutex_exit(&spa->spa_scrub_lock);
 }
 
 /*
  * Maximum number of concurrent scrub i/os to create while verifying
  * a pool while importing it.
  */
 int spa_load_verify_maxinflight = 10000;
 boolean_t spa_load_verify_metadata = B_TRUE;
 boolean_t spa_load_verify_data = B_TRUE;
 
 SYSCTL_INT(_vfs_zfs, OID_AUTO, spa_load_verify_maxinflight, CTLFLAG_RWTUN,
     &spa_load_verify_maxinflight, 0,
     "Maximum number of concurrent scrub I/Os to create while verifying a "
     "pool while importing it");
 
 SYSCTL_INT(_vfs_zfs, OID_AUTO, spa_load_verify_metadata, CTLFLAG_RWTUN,
     &spa_load_verify_metadata, 0,
     "Check metadata on import?");
  
 SYSCTL_INT(_vfs_zfs, OID_AUTO, spa_load_verify_data, CTLFLAG_RWTUN,
     &spa_load_verify_data, 0,
     "Check user data on import?");
  
 /*ARGSUSED*/
 static int
 spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
     const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg)
 {
 	if (bp == NULL || BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp))
 		return (0);
 	/*
 	 * Note: normally this routine will not be called if
 	 * spa_load_verify_metadata is not set.  However, it may be useful
 	 * to manually set the flag after the traversal has begun.
 	 */
 	if (!spa_load_verify_metadata)
 		return (0);
 	if (BP_GET_BUFC_TYPE(bp) == ARC_BUFC_DATA && !spa_load_verify_data)
 		return (0);
 
 	zio_t *rio = arg;
 	size_t size = BP_GET_PSIZE(bp);
 	void *data = zio_data_buf_alloc(size);
 
 	mutex_enter(&spa->spa_scrub_lock);
 	while (spa->spa_scrub_inflight >= spa_load_verify_maxinflight)
 		cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock);
 	spa->spa_scrub_inflight++;
 	mutex_exit(&spa->spa_scrub_lock);
 
 	zio_nowait(zio_read(rio, spa, bp, data, size,
 	    spa_load_verify_done, rio->io_private, ZIO_PRIORITY_SCRUB,
 	    ZIO_FLAG_SPECULATIVE | ZIO_FLAG_CANFAIL |
 	    ZIO_FLAG_SCRUB | ZIO_FLAG_RAW, zb));
 	return (0);
 }
 
 /* ARGSUSED */
 int
 verify_dataset_name_len(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg)
 {
 	if (dsl_dataset_namelen(ds) >= ZFS_MAX_DATASET_NAME_LEN)
 		return (SET_ERROR(ENAMETOOLONG));
 
 	return (0);
 }
 
 static int
 spa_load_verify(spa_t *spa)
 {
 	zio_t *rio;
 	spa_load_error_t sle = { 0 };
 	zpool_rewind_policy_t policy;
 	boolean_t verify_ok = B_FALSE;
 	int error = 0;
 
 	zpool_get_rewind_policy(spa->spa_config, &policy);
 
 	if (policy.zrp_request & ZPOOL_NEVER_REWIND)
 		return (0);
 
 	dsl_pool_config_enter(spa->spa_dsl_pool, FTAG);
 	error = dmu_objset_find_dp(spa->spa_dsl_pool,
 	    spa->spa_dsl_pool->dp_root_dir_obj, verify_dataset_name_len, NULL,
 	    DS_FIND_CHILDREN);
 	dsl_pool_config_exit(spa->spa_dsl_pool, FTAG);
 	if (error != 0)
 		return (error);
 
 	rio = zio_root(spa, NULL, &sle,
 	    ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE);
 
 	if (spa_load_verify_metadata) {
 		error = traverse_pool(spa, spa->spa_verify_min_txg,
 		    TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA,
 		    spa_load_verify_cb, rio);
 	}
 
 	(void) zio_wait(rio);
 
 	spa->spa_load_meta_errors = sle.sle_meta_count;
 	spa->spa_load_data_errors = sle.sle_data_count;
 
 	if (!error && sle.sle_meta_count <= policy.zrp_maxmeta &&
 	    sle.sle_data_count <= policy.zrp_maxdata) {
 		int64_t loss = 0;
 
 		verify_ok = B_TRUE;
 		spa->spa_load_txg = spa->spa_uberblock.ub_txg;
 		spa->spa_load_txg_ts = spa->spa_uberblock.ub_timestamp;
 
 		loss = spa->spa_last_ubsync_txg_ts - spa->spa_load_txg_ts;
 		VERIFY(nvlist_add_uint64(spa->spa_load_info,
 		    ZPOOL_CONFIG_LOAD_TIME, spa->spa_load_txg_ts) == 0);
 		VERIFY(nvlist_add_int64(spa->spa_load_info,
 		    ZPOOL_CONFIG_REWIND_TIME, loss) == 0);
 		VERIFY(nvlist_add_uint64(spa->spa_load_info,
 		    ZPOOL_CONFIG_LOAD_DATA_ERRORS, sle.sle_data_count) == 0);
 	} else {
 		spa->spa_load_max_txg = spa->spa_uberblock.ub_txg;
 	}
 
 	if (error) {
 		if (error != ENXIO && error != EIO)
 			error = SET_ERROR(EIO);
 		return (error);
 	}
 
 	return (verify_ok ? 0 : EIO);
 }
 
 /*
  * Find a value in the pool props object.
  */
 static void
 spa_prop_find(spa_t *spa, zpool_prop_t prop, uint64_t *val)
 {
 	(void) zap_lookup(spa->spa_meta_objset, spa->spa_pool_props_object,
 	    zpool_prop_to_name(prop), sizeof (uint64_t), 1, val);
 }
 
 /*
  * Find a value in the pool directory object.
  */
 static int
 spa_dir_prop(spa_t *spa, const char *name, uint64_t *val)
 {
 	return (zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
 	    name, sizeof (uint64_t), 1, val));
 }
 
 static int
 spa_vdev_err(vdev_t *vdev, vdev_aux_t aux, int err)
 {
 	vdev_set_state(vdev, B_TRUE, VDEV_STATE_CANT_OPEN, aux);
 	return (err);
 }
 
 /*
  * Fix up config after a partly-completed split.  This is done with the
  * ZPOOL_CONFIG_SPLIT nvlist.  Both the splitting pool and the split-off
  * pool have that entry in their config, but only the splitting one contains
  * a list of all the guids of the vdevs that are being split off.
  *
  * This function determines what to do with that list: either rejoin
  * all the disks to the pool, or complete the splitting process.  To attempt
  * the rejoin, each disk that is offlined is marked online again, and
  * we do a reopen() call.  If the vdev label for every disk that was
  * marked online indicates it was successfully split off (VDEV_AUX_SPLIT_POOL)
  * then we call vdev_split() on each disk, and complete the split.
  *
  * Otherwise we leave the config alone, with all the vdevs in place in
  * the original pool.
  */
 static void
 spa_try_repair(spa_t *spa, nvlist_t *config)
 {
 	uint_t extracted;
 	uint64_t *glist;
 	uint_t i, gcount;
 	nvlist_t *nvl;
 	vdev_t **vd;
 	boolean_t attempt_reopen;
 
 	if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_SPLIT, &nvl) != 0)
 		return;
 
 	/* check that the config is complete */
 	if (nvlist_lookup_uint64_array(nvl, ZPOOL_CONFIG_SPLIT_LIST,
 	    &glist, &gcount) != 0)
 		return;
 
 	vd = kmem_zalloc(gcount * sizeof (vdev_t *), KM_SLEEP);
 
 	/* attempt to online all the vdevs & validate */
 	attempt_reopen = B_TRUE;
 	for (i = 0; i < gcount; i++) {
 		if (glist[i] == 0)	/* vdev is hole */
 			continue;
 
 		vd[i] = spa_lookup_by_guid(spa, glist[i], B_FALSE);
 		if (vd[i] == NULL) {
 			/*
 			 * Don't bother attempting to reopen the disks;
 			 * just do the split.
 			 */
 			attempt_reopen = B_FALSE;
 		} else {
 			/* attempt to re-online it */
 			vd[i]->vdev_offline = B_FALSE;
 		}
 	}
 
 	if (attempt_reopen) {
 		vdev_reopen(spa->spa_root_vdev);
 
 		/* check each device to see what state it's in */
 		for (extracted = 0, i = 0; i < gcount; i++) {
 			if (vd[i] != NULL &&
 			    vd[i]->vdev_stat.vs_aux != VDEV_AUX_SPLIT_POOL)
 				break;
 			++extracted;
 		}
 	}
 
 	/*
 	 * If every disk has been moved to the new pool, or if we never
 	 * even attempted to look at them, then we split them off for
 	 * good.
 	 */
 	if (!attempt_reopen || gcount == extracted) {
 		for (i = 0; i < gcount; i++)
 			if (vd[i] != NULL)
 				vdev_split(vd[i]);
 		vdev_reopen(spa->spa_root_vdev);
 	}
 
 	kmem_free(vd, gcount * sizeof (vdev_t *));
 }
 
 static int
 spa_load(spa_t *spa, spa_load_state_t state, spa_import_type_t type,
     boolean_t mosconfig)
 {
 	nvlist_t *config = spa->spa_config;
 	char *ereport = FM_EREPORT_ZFS_POOL;
 	char *comment;
 	int error;
 	uint64_t pool_guid;
 	nvlist_t *nvl;
 
 	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid))
 		return (SET_ERROR(EINVAL));
 
 	ASSERT(spa->spa_comment == NULL);
 	if (nvlist_lookup_string(config, ZPOOL_CONFIG_COMMENT, &comment) == 0)
 		spa->spa_comment = spa_strdup(comment);
 
 	/*
 	 * Versioning wasn't explicitly added to the label until later, so if
 	 * it's not present treat it as the initial version.
 	 */
 	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
 	    &spa->spa_ubsync.ub_version) != 0)
 		spa->spa_ubsync.ub_version = SPA_VERSION_INITIAL;
 
 	(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG,
 	    &spa->spa_config_txg);
 
 	if ((state == SPA_LOAD_IMPORT || state == SPA_LOAD_TRYIMPORT) &&
 	    spa_guid_exists(pool_guid, 0)) {
 		error = SET_ERROR(EEXIST);
 	} else {
 		spa->spa_config_guid = pool_guid;
 
 		if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_SPLIT,
 		    &nvl) == 0) {
 			VERIFY(nvlist_dup(nvl, &spa->spa_config_splitting,
 			    KM_SLEEP) == 0);
 		}
 
 		nvlist_free(spa->spa_load_info);
 		spa->spa_load_info = fnvlist_alloc();
 
 		gethrestime(&spa->spa_loaded_ts);
 		error = spa_load_impl(spa, pool_guid, config, state, type,
 		    mosconfig, &ereport);
 	}
 
 	/*
 	 * Don't count references from objsets that are already closed
 	 * and are making their way through the eviction process.
 	 */
 	spa_evicting_os_wait(spa);
 	spa->spa_minref = refcount_count(&spa->spa_refcount);
 	if (error) {
 		if (error != EEXIST) {
 			spa->spa_loaded_ts.tv_sec = 0;
 			spa->spa_loaded_ts.tv_nsec = 0;
 		}
 		if (error != EBADF) {
 			zfs_ereport_post(ereport, spa, NULL, NULL, 0, 0);
 		}
 	}
 	spa->spa_load_state = error ? SPA_LOAD_ERROR : SPA_LOAD_NONE;
 	spa->spa_ena = 0;
 
 	return (error);
 }
 
 /*
  * Count the number of per-vdev ZAPs associated with all of the vdevs in the
  * vdev tree rooted in the given vd, and ensure that each ZAP is present in the
  * spa's per-vdev ZAP list.
  */
 static uint64_t
 vdev_count_verify_zaps(vdev_t *vd)
 {
 	spa_t *spa = vd->vdev_spa;
 	uint64_t total = 0;
 	if (vd->vdev_top_zap != 0) {
 		total++;
 		ASSERT0(zap_lookup_int(spa->spa_meta_objset,
 		    spa->spa_all_vdev_zaps, vd->vdev_top_zap));
 	}
 	if (vd->vdev_leaf_zap != 0) {
 		total++;
 		ASSERT0(zap_lookup_int(spa->spa_meta_objset,
 		    spa->spa_all_vdev_zaps, vd->vdev_leaf_zap));
 	}
 
 	for (uint64_t i = 0; i < vd->vdev_children; i++) {
 		total += vdev_count_verify_zaps(vd->vdev_child[i]);
 	}
 
 	return (total);
 }
 
 /*
  * Load an existing storage pool, using the pool's builtin spa_config as a
  * source of configuration information.
  */
 static int
 spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
     spa_load_state_t state, spa_import_type_t type, boolean_t mosconfig,
     char **ereport)
 {
 	int error = 0;
 	nvlist_t *nvroot = NULL;
 	nvlist_t *label;
 	vdev_t *rvd;
 	uberblock_t *ub = &spa->spa_uberblock;
 	uint64_t children, config_cache_txg = spa->spa_config_txg;
 	int orig_mode = spa->spa_mode;
 	int parse;
 	uint64_t obj;
 	boolean_t missing_feat_write = B_FALSE;
 
 	/*
 	 * If this is an untrusted config, access the pool in read-only mode.
 	 * This prevents things like resilvering recently removed devices.
 	 */
 	if (!mosconfig)
 		spa->spa_mode = FREAD;
 
 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
 
 	spa->spa_load_state = state;
 
 	if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot))
 		return (SET_ERROR(EINVAL));
 
 	parse = (type == SPA_IMPORT_EXISTING ?
 	    VDEV_ALLOC_LOAD : VDEV_ALLOC_SPLIT);
 
 	/*
 	 * Create "The Godfather" zio to hold all async IOs
 	 */
 	spa->spa_async_zio_root = kmem_alloc(max_ncpus * sizeof (void *),
 	    KM_SLEEP);
 	for (int i = 0; i < max_ncpus; i++) {
 		spa->spa_async_zio_root[i] = zio_root(spa, NULL, NULL,
 		    ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE |
 		    ZIO_FLAG_GODFATHER);
 	}
 
 	/*
 	 * Parse the configuration into a vdev tree.  We explicitly set the
 	 * value that will be returned by spa_version() since parsing the
 	 * configuration requires knowing the version number.
 	 */
 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
 	error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, parse);
 	spa_config_exit(spa, SCL_ALL, FTAG);
 
 	if (error != 0)
 		return (error);
 
 	ASSERT(spa->spa_root_vdev == rvd);
 	ASSERT3U(spa->spa_min_ashift, >=, SPA_MINBLOCKSHIFT);
 	ASSERT3U(spa->spa_max_ashift, <=, SPA_MAXBLOCKSHIFT);
 
 	if (type != SPA_IMPORT_ASSEMBLE) {
 		ASSERT(spa_guid(spa) == pool_guid);
 	}
 
 	/*
 	 * Try to open all vdevs, loading each label in the process.
 	 */
 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
 	error = vdev_open(rvd);
 	spa_config_exit(spa, SCL_ALL, FTAG);
 	if (error != 0)
 		return (error);
 
 	/*
 	 * We need to validate the vdev labels against the configuration that
 	 * we have in hand, which is dependent on the setting of mosconfig. If
 	 * mosconfig is true then we're validating the vdev labels based on
 	 * that config.  Otherwise, we're validating against the cached config
 	 * (zpool.cache) that was read when we loaded the zfs module, and then
 	 * later we will recursively call spa_load() and validate against
 	 * the vdev config.
 	 *
 	 * If we're assembling a new pool that's been split off from an
 	 * existing pool, the labels haven't yet been updated so we skip
 	 * validation for now.
 	 */
 	if (type != SPA_IMPORT_ASSEMBLE) {
 		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
 		error = vdev_validate(rvd, mosconfig);
 		spa_config_exit(spa, SCL_ALL, FTAG);
 
 		if (error != 0)
 			return (error);
 
 		if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN)
 			return (SET_ERROR(ENXIO));
 	}
 
 	/*
 	 * Find the best uberblock.
 	 */
 	vdev_uberblock_load(rvd, ub, &label);
 
 	/*
 	 * If we weren't able to find a single valid uberblock, return failure.
 	 */
 	if (ub->ub_txg == 0) {
 		nvlist_free(label);
 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, ENXIO));
 	}
 
 	/*
 	 * If the pool has an unsupported version we can't open it.
 	 */
 	if (!SPA_VERSION_IS_SUPPORTED(ub->ub_version)) {
 		nvlist_free(label);
 		return (spa_vdev_err(rvd, VDEV_AUX_VERSION_NEWER, ENOTSUP));
 	}
 
 	if (ub->ub_version >= SPA_VERSION_FEATURES) {
 		nvlist_t *features;
 
 		/*
 		 * If we weren't able to find what's necessary for reading the
 		 * MOS in the label, return failure.
 		 */
 		if (label == NULL || nvlist_lookup_nvlist(label,
 		    ZPOOL_CONFIG_FEATURES_FOR_READ, &features) != 0) {
 			nvlist_free(label);
 			return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA,
 			    ENXIO));
 		}
 
 		/*
 		 * Update our in-core representation with the definitive values
 		 * from the label.
 		 */
 		nvlist_free(spa->spa_label_features);
 		VERIFY(nvlist_dup(features, &spa->spa_label_features, 0) == 0);
 	}
 
 	nvlist_free(label);
 
 	/*
 	 * Look through entries in the label nvlist's features_for_read. If
 	 * there is a feature listed there which we don't understand then we
 	 * cannot open a pool.
 	 */
 	if (ub->ub_version >= SPA_VERSION_FEATURES) {
 		nvlist_t *unsup_feat;
 
 		VERIFY(nvlist_alloc(&unsup_feat, NV_UNIQUE_NAME, KM_SLEEP) ==
 		    0);
 
 		for (nvpair_t *nvp = nvlist_next_nvpair(spa->spa_label_features,
 		    NULL); nvp != NULL;
 		    nvp = nvlist_next_nvpair(spa->spa_label_features, nvp)) {
 			if (!zfeature_is_supported(nvpair_name(nvp))) {
 				VERIFY(nvlist_add_string(unsup_feat,
 				    nvpair_name(nvp), "") == 0);
 			}
 		}
 
 		if (!nvlist_empty(unsup_feat)) {
 			VERIFY(nvlist_add_nvlist(spa->spa_load_info,
 			    ZPOOL_CONFIG_UNSUP_FEAT, unsup_feat) == 0);
 			nvlist_free(unsup_feat);
 			return (spa_vdev_err(rvd, VDEV_AUX_UNSUP_FEAT,
 			    ENOTSUP));
 		}
 
 		nvlist_free(unsup_feat);
 	}
 
 	/*
 	 * If the vdev guid sum doesn't match the uberblock, we have an
 	 * incomplete configuration.  We first check to see if the pool
 	 * is aware of the complete config (i.e ZPOOL_CONFIG_VDEV_CHILDREN).
 	 * If it is, defer the vdev_guid_sum check till later so we
 	 * can handle missing vdevs.
 	 */
 	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VDEV_CHILDREN,
 	    &children) != 0 && mosconfig && type != SPA_IMPORT_ASSEMBLE &&
 	    rvd->vdev_guid_sum != ub->ub_guid_sum)
 		return (spa_vdev_err(rvd, VDEV_AUX_BAD_GUID_SUM, ENXIO));
 
 	if (type != SPA_IMPORT_ASSEMBLE && spa->spa_config_splitting) {
 		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
 		spa_try_repair(spa, config);
 		spa_config_exit(spa, SCL_ALL, FTAG);
 		nvlist_free(spa->spa_config_splitting);
 		spa->spa_config_splitting = NULL;
 	}
 
 	/*
 	 * Initialize internal SPA structures.
 	 */
 	spa->spa_state = POOL_STATE_ACTIVE;
 	spa->spa_ubsync = spa->spa_uberblock;
 	spa->spa_verify_min_txg = spa->spa_extreme_rewind ?
 	    TXG_INITIAL - 1 : spa_last_synced_txg(spa) - TXG_DEFER_SIZE - 1;
 	spa->spa_first_txg = spa->spa_last_ubsync_txg ?
 	    spa->spa_last_ubsync_txg : spa_last_synced_txg(spa) + 1;
 	spa->spa_claim_max_txg = spa->spa_first_txg;
 	spa->spa_prev_software_version = ub->ub_software_version;
 
 	error = dsl_pool_init(spa, spa->spa_first_txg, &spa->spa_dsl_pool);
 	if (error)
 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
 	spa->spa_meta_objset = spa->spa_dsl_pool->dp_meta_objset;
 
 	if (spa_dir_prop(spa, DMU_POOL_CONFIG, &spa->spa_config_object) != 0)
 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
 
 	if (spa_version(spa) >= SPA_VERSION_FEATURES) {
 		boolean_t missing_feat_read = B_FALSE;
 		nvlist_t *unsup_feat, *enabled_feat;
 
 		if (spa_dir_prop(spa, DMU_POOL_FEATURES_FOR_READ,
 		    &spa->spa_feat_for_read_obj) != 0) {
 			return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
 		}
 
 		if (spa_dir_prop(spa, DMU_POOL_FEATURES_FOR_WRITE,
 		    &spa->spa_feat_for_write_obj) != 0) {
 			return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
 		}
 
 		if (spa_dir_prop(spa, DMU_POOL_FEATURE_DESCRIPTIONS,
 		    &spa->spa_feat_desc_obj) != 0) {
 			return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
 		}
 
 		enabled_feat = fnvlist_alloc();
 		unsup_feat = fnvlist_alloc();
 
 		if (!spa_features_check(spa, B_FALSE,
 		    unsup_feat, enabled_feat))
 			missing_feat_read = B_TRUE;
 
 		if (spa_writeable(spa) || state == SPA_LOAD_TRYIMPORT) {
 			if (!spa_features_check(spa, B_TRUE,
 			    unsup_feat, enabled_feat)) {
 				missing_feat_write = B_TRUE;
 			}
 		}
 
 		fnvlist_add_nvlist(spa->spa_load_info,
 		    ZPOOL_CONFIG_ENABLED_FEAT, enabled_feat);
 
 		if (!nvlist_empty(unsup_feat)) {
 			fnvlist_add_nvlist(spa->spa_load_info,
 			    ZPOOL_CONFIG_UNSUP_FEAT, unsup_feat);
 		}
 
 		fnvlist_free(enabled_feat);
 		fnvlist_free(unsup_feat);
 
 		if (!missing_feat_read) {
 			fnvlist_add_boolean(spa->spa_load_info,
 			    ZPOOL_CONFIG_CAN_RDONLY);
 		}
 
 		/*
 		 * If the state is SPA_LOAD_TRYIMPORT, our objective is
 		 * twofold: to determine whether the pool is available for
 		 * import in read-write mode and (if it is not) whether the
 		 * pool is available for import in read-only mode. If the pool
 		 * is available for import in read-write mode, it is displayed
 		 * as available in userland; if it is not available for import
 		 * in read-only mode, it is displayed as unavailable in
 		 * userland. If the pool is available for import in read-only
 		 * mode but not read-write mode, it is displayed as unavailable
 		 * in userland with a special note that the pool is actually
 		 * available for open in read-only mode.
 		 *
 		 * As a result, if the state is SPA_LOAD_TRYIMPORT and we are
 		 * missing a feature for write, we must first determine whether
 		 * the pool can be opened read-only before returning to
 		 * userland in order to know whether to display the
 		 * abovementioned note.
 		 */
 		if (missing_feat_read || (missing_feat_write &&
 		    spa_writeable(spa))) {
 			return (spa_vdev_err(rvd, VDEV_AUX_UNSUP_FEAT,
 			    ENOTSUP));
 		}
 
 		/*
 		 * Load refcounts for ZFS features from disk into an in-memory
 		 * cache during SPA initialization.
 		 */
 		for (spa_feature_t i = 0; i < SPA_FEATURES; i++) {
 			uint64_t refcount;
 
 			error = feature_get_refcount_from_disk(spa,
 			    &spa_feature_table[i], &refcount);
 			if (error == 0) {
 				spa->spa_feat_refcount_cache[i] = refcount;
 			} else if (error == ENOTSUP) {
 				spa->spa_feat_refcount_cache[i] =
 				    SPA_FEATURE_DISABLED;
 			} else {
 				return (spa_vdev_err(rvd,
 				    VDEV_AUX_CORRUPT_DATA, EIO));
 			}
 		}
 	}
 
 	if (spa_feature_is_active(spa, SPA_FEATURE_ENABLED_TXG)) {
 		if (spa_dir_prop(spa, DMU_POOL_FEATURE_ENABLED_TXG,
 		    &spa->spa_feat_enabled_txg_obj) != 0)
 			return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
 	}
 
 	spa->spa_is_initializing = B_TRUE;
 	error = dsl_pool_open(spa->spa_dsl_pool);
 	spa->spa_is_initializing = B_FALSE;
 	if (error != 0)
 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
 
 	if (!mosconfig) {
 		uint64_t hostid;
 		nvlist_t *policy = NULL, *nvconfig;
 
 		if (load_nvlist(spa, spa->spa_config_object, &nvconfig) != 0)
 			return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
 
 		if (!spa_is_root(spa) && nvlist_lookup_uint64(nvconfig,
 		    ZPOOL_CONFIG_HOSTID, &hostid) == 0) {
 			char *hostname;
 			unsigned long myhostid = 0;
 
 			VERIFY(nvlist_lookup_string(nvconfig,
 			    ZPOOL_CONFIG_HOSTNAME, &hostname) == 0);
 
 #ifdef	_KERNEL
 			myhostid = zone_get_hostid(NULL);
 #else	/* _KERNEL */
 			/*
 			 * We're emulating the system's hostid in userland, so
 			 * we can't use zone_get_hostid().
 			 */
 			(void) ddi_strtoul(hw_serial, NULL, 10, &myhostid);
 #endif	/* _KERNEL */
 			if (check_hostid && hostid != 0 && myhostid != 0 &&
 			    hostid != myhostid) {
 				nvlist_free(nvconfig);
 				cmn_err(CE_WARN, "pool '%s' could not be "
 				    "loaded as it was last accessed by "
 				    "another system (host: %s hostid: 0x%lx). "
 				    "See: http://illumos.org/msg/ZFS-8000-EY",
 				    spa_name(spa), hostname,
 				    (unsigned long)hostid);
 				return (SET_ERROR(EBADF));
 			}
 		}
 		if (nvlist_lookup_nvlist(spa->spa_config,
 		    ZPOOL_REWIND_POLICY, &policy) == 0)
 			VERIFY(nvlist_add_nvlist(nvconfig,
 			    ZPOOL_REWIND_POLICY, policy) == 0);
 
 		spa_config_set(spa, nvconfig);
 		spa_unload(spa);
 		spa_deactivate(spa);
 		spa_activate(spa, orig_mode);
 
 		return (spa_load(spa, state, SPA_IMPORT_EXISTING, B_TRUE));
 	}
 
 	/* Grab the secret checksum salt from the MOS. */
 	error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
 	    DMU_POOL_CHECKSUM_SALT, 1,
 	    sizeof (spa->spa_cksum_salt.zcs_bytes),
 	    spa->spa_cksum_salt.zcs_bytes);
 	if (error == ENOENT) {
 		/* Generate a new salt for subsequent use */
 		(void) random_get_pseudo_bytes(spa->spa_cksum_salt.zcs_bytes,
 		    sizeof (spa->spa_cksum_salt.zcs_bytes));
 	} else if (error != 0) {
 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
 	}
 
 	if (spa_dir_prop(spa, DMU_POOL_SYNC_BPOBJ, &obj) != 0)
 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
 	error = bpobj_open(&spa->spa_deferred_bpobj, spa->spa_meta_objset, obj);
 	if (error != 0)
 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
 
 	/*
 	 * Load the bit that tells us to use the new accounting function
 	 * (raid-z deflation).  If we have an older pool, this will not
 	 * be present.
 	 */
 	error = spa_dir_prop(spa, DMU_POOL_DEFLATE, &spa->spa_deflate);
 	if (error != 0 && error != ENOENT)
 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
 
 	error = spa_dir_prop(spa, DMU_POOL_CREATION_VERSION,
 	    &spa->spa_creation_version);
 	if (error != 0 && error != ENOENT)
 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
 
 	/*
 	 * Load the persistent error log.  If we have an older pool, this will
 	 * not be present.
 	 */
 	error = spa_dir_prop(spa, DMU_POOL_ERRLOG_LAST, &spa->spa_errlog_last);
 	if (error != 0 && error != ENOENT)
 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
 
 	error = spa_dir_prop(spa, DMU_POOL_ERRLOG_SCRUB,
 	    &spa->spa_errlog_scrub);
 	if (error != 0 && error != ENOENT)
 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
 
 	/*
 	 * Load the history object.  If we have an older pool, this
 	 * will not be present.
 	 */
 	error = spa_dir_prop(spa, DMU_POOL_HISTORY, &spa->spa_history);
 	if (error != 0 && error != ENOENT)
 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
 
 	/*
 	 * Load the per-vdev ZAP map. If we have an older pool, this will not
 	 * be present; in this case, defer its creation to a later time to
 	 * avoid dirtying the MOS this early / out of sync context. See
 	 * spa_sync_config_object.
 	 */
 
 	/* The sentinel is only available in the MOS config. */
 	nvlist_t *mos_config;
 	if (load_nvlist(spa, spa->spa_config_object, &mos_config) != 0)
 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
 
 	error = spa_dir_prop(spa, DMU_POOL_VDEV_ZAP_MAP,
 	    &spa->spa_all_vdev_zaps);
 
 	if (error != ENOENT && error != 0) {
 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
 	} else if (error == 0 && !nvlist_exists(mos_config,
 	    ZPOOL_CONFIG_HAS_PER_VDEV_ZAPS)) {
 		/*
 		 * An older version of ZFS overwrote the sentinel value, so
 		 * we have orphaned per-vdev ZAPs in the MOS. Defer their
 		 * destruction to later; see spa_sync_config_object.
 		 */
 		spa->spa_avz_action = AVZ_ACTION_DESTROY;
 		/*
 		 * We're assuming that no vdevs have had their ZAPs created
 		 * before this. Better be sure of it.
 		 */
 		ASSERT0(vdev_count_verify_zaps(spa->spa_root_vdev));
 	}
 	nvlist_free(mos_config);
 
 	/*
 	 * If we're assembling the pool from the split-off vdevs of
 	 * an existing pool, we don't want to attach the spares & cache
 	 * devices.
 	 */
 
 	/*
 	 * Load any hot spares for this pool.
 	 */
 	error = spa_dir_prop(spa, DMU_POOL_SPARES, &spa->spa_spares.sav_object);
 	if (error != 0 && error != ENOENT)
 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
 	if (error == 0 && type != SPA_IMPORT_ASSEMBLE) {
 		ASSERT(spa_version(spa) >= SPA_VERSION_SPARES);
 		if (load_nvlist(spa, spa->spa_spares.sav_object,
 		    &spa->spa_spares.sav_config) != 0)
 			return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
 
 		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
 		spa_load_spares(spa);
 		spa_config_exit(spa, SCL_ALL, FTAG);
 	} else if (error == 0) {
 		spa->spa_spares.sav_sync = B_TRUE;
 	}
 
 	/*
 	 * Load any level 2 ARC devices for this pool.
 	 */
 	error = spa_dir_prop(spa, DMU_POOL_L2CACHE,
 	    &spa->spa_l2cache.sav_object);
 	if (error != 0 && error != ENOENT)
 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
 	if (error == 0 && type != SPA_IMPORT_ASSEMBLE) {
 		ASSERT(spa_version(spa) >= SPA_VERSION_L2CACHE);
 		if (load_nvlist(spa, spa->spa_l2cache.sav_object,
 		    &spa->spa_l2cache.sav_config) != 0)
 			return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
 
 		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
 		spa_load_l2cache(spa);
 		spa_config_exit(spa, SCL_ALL, FTAG);
 	} else if (error == 0) {
 		spa->spa_l2cache.sav_sync = B_TRUE;
 	}
 
 	spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION);
 
 	error = spa_dir_prop(spa, DMU_POOL_PROPS, &spa->spa_pool_props_object);
 	if (error && error != ENOENT)
 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
 
 	if (error == 0) {
 		uint64_t autoreplace;
 
 		spa_prop_find(spa, ZPOOL_PROP_BOOTFS, &spa->spa_bootfs);
 		spa_prop_find(spa, ZPOOL_PROP_AUTOREPLACE, &autoreplace);
 		spa_prop_find(spa, ZPOOL_PROP_DELEGATION, &spa->spa_delegation);
 		spa_prop_find(spa, ZPOOL_PROP_FAILUREMODE, &spa->spa_failmode);
 		spa_prop_find(spa, ZPOOL_PROP_AUTOEXPAND, &spa->spa_autoexpand);
 		spa_prop_find(spa, ZPOOL_PROP_DEDUPDITTO,
 		    &spa->spa_dedup_ditto);
 
 		spa->spa_autoreplace = (autoreplace != 0);
 	}
 
 	/*
 	 * If the 'autoreplace' property is set, then post a resource notifying
 	 * the ZFS DE that it should not issue any faults for unopenable
 	 * devices.  We also iterate over the vdevs, and post a sysevent for any
 	 * unopenable vdevs so that the normal autoreplace handler can take
 	 * over.
 	 */
 	if (spa->spa_autoreplace && state != SPA_LOAD_TRYIMPORT) {
 		spa_check_removed(spa->spa_root_vdev);
 		/*
 		 * For the import case, this is done in spa_import(), because
 		 * at this point we're using the spare definitions from
 		 * the MOS config, not necessarily from the userland config.
 		 */
 		if (state != SPA_LOAD_IMPORT) {
 			spa_aux_check_removed(&spa->spa_spares);
 			spa_aux_check_removed(&spa->spa_l2cache);
 		}
 	}
 
 	/*
 	 * Load the vdev state for all toplevel vdevs.
 	 */
 	vdev_load(rvd);
 
 	/*
 	 * Propagate the leaf DTLs we just loaded all the way up the tree.
 	 */
 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
 	vdev_dtl_reassess(rvd, 0, 0, B_FALSE);
 	spa_config_exit(spa, SCL_ALL, FTAG);
 
 	/*
 	 * Load the DDTs (dedup tables).
 	 */
 	error = ddt_load(spa);
 	if (error != 0)
 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
 
 	spa_update_dspace(spa);
 
 	/*
 	 * Validate the config, using the MOS config to fill in any
 	 * information which might be missing.  If we fail to validate
 	 * the config then declare the pool unfit for use. If we're
 	 * assembling a pool from a split, the log is not transferred
 	 * over.
 	 */
 	if (type != SPA_IMPORT_ASSEMBLE) {
 		nvlist_t *nvconfig;
 
 		if (load_nvlist(spa, spa->spa_config_object, &nvconfig) != 0)
 			return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
 
 		if (!spa_config_valid(spa, nvconfig)) {
 			nvlist_free(nvconfig);
 			return (spa_vdev_err(rvd, VDEV_AUX_BAD_GUID_SUM,
 			    ENXIO));
 		}
 		nvlist_free(nvconfig);
 
 		/*
 		 * Now that we've validated the config, check the state of the
 		 * root vdev.  If it can't be opened, it indicates one or
 		 * more toplevel vdevs are faulted.
 		 */
 		if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN)
 			return (SET_ERROR(ENXIO));
 
 		if (spa_writeable(spa) && spa_check_logs(spa)) {
 			*ereport = FM_EREPORT_ZFS_LOG_REPLAY;
 			return (spa_vdev_err(rvd, VDEV_AUX_BAD_LOG, ENXIO));
 		}
 	}
 
 	if (missing_feat_write) {
 		ASSERT(state == SPA_LOAD_TRYIMPORT);
 
 		/*
 		 * At this point, we know that we can open the pool in
 		 * read-only mode but not read-write mode. We now have enough
 		 * information and can return to userland.
 		 */
 		return (spa_vdev_err(rvd, VDEV_AUX_UNSUP_FEAT, ENOTSUP));
 	}
 
 	/*
 	 * We've successfully opened the pool, verify that we're ready
 	 * to start pushing transactions.
 	 */
 	if (state != SPA_LOAD_TRYIMPORT) {
 		if (error = spa_load_verify(spa))
 			return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA,
 			    error));
 	}
 
 	if (spa_writeable(spa) && (state == SPA_LOAD_RECOVER ||
 	    spa->spa_load_max_txg == UINT64_MAX)) {
 		dmu_tx_t *tx;
 		int need_update = B_FALSE;
 		dsl_pool_t *dp = spa_get_dsl(spa);
 
 		ASSERT(state != SPA_LOAD_TRYIMPORT);
 
 		/*
 		 * Claim log blocks that haven't been committed yet.
 		 * This must all happen in a single txg.
 		 * Note: spa_claim_max_txg is updated by spa_claim_notify(),
 		 * invoked from zil_claim_log_block()'s i/o done callback.
 		 * Price of rollback is that we abandon the log.
 		 */
 		spa->spa_claiming = B_TRUE;
 
 		tx = dmu_tx_create_assigned(dp, spa_first_txg(spa));
 		(void) dmu_objset_find_dp(dp, dp->dp_root_dir_obj,
 		    zil_claim, tx, DS_FIND_CHILDREN);
 		dmu_tx_commit(tx);
 
 		spa->spa_claiming = B_FALSE;
 
 		spa_set_log_state(spa, SPA_LOG_GOOD);
 		spa->spa_sync_on = B_TRUE;
 		txg_sync_start(spa->spa_dsl_pool);
 
 		/*
 		 * Wait for all claims to sync.  We sync up to the highest
 		 * claimed log block birth time so that claimed log blocks
 		 * don't appear to be from the future.  spa_claim_max_txg
 		 * will have been set for us by either zil_check_log_chain()
 		 * (invoked from spa_check_logs()) or zil_claim() above.
 		 */
 		txg_wait_synced(spa->spa_dsl_pool, spa->spa_claim_max_txg);
 
 		/*
 		 * If the config cache is stale, or we have uninitialized
 		 * metaslabs (see spa_vdev_add()), then update the config.
 		 *
 		 * If this is a verbatim import, trust the current
 		 * in-core spa_config and update the disk labels.
 		 */
 		if (config_cache_txg != spa->spa_config_txg ||
 		    state == SPA_LOAD_IMPORT ||
 		    state == SPA_LOAD_RECOVER ||
 		    (spa->spa_import_flags & ZFS_IMPORT_VERBATIM))
 			need_update = B_TRUE;
 
 		for (int c = 0; c < rvd->vdev_children; c++)
 			if (rvd->vdev_child[c]->vdev_ms_array == 0)
 				need_update = B_TRUE;
 
 		/*
 		 * Update the config cache asychronously in case we're the
 		 * root pool, in which case the config cache isn't writable yet.
 		 */
 		if (need_update)
 			spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE);
 
 		/*
 		 * Check all DTLs to see if anything needs resilvering.
 		 */
 		if (!dsl_scan_resilvering(spa->spa_dsl_pool) &&
 		    vdev_resilver_needed(rvd, NULL, NULL))
 			spa_async_request(spa, SPA_ASYNC_RESILVER);
 
 		/*
 		 * Log the fact that we booted up (so that we can detect if
 		 * we rebooted in the middle of an operation).
 		 */
 		spa_history_log_version(spa, "open");
 
 		/*
 		 * Delete any inconsistent datasets.
 		 */
 		(void) dmu_objset_find(spa_name(spa),
 		    dsl_destroy_inconsistent, NULL, DS_FIND_CHILDREN);
 
 		/*
 		 * Clean up any stale temporary dataset userrefs.
 		 */
 		dsl_pool_clean_tmp_userrefs(spa->spa_dsl_pool);
 	}
 
 	return (0);
 }
 
 static int
 spa_load_retry(spa_t *spa, spa_load_state_t state, int mosconfig)
 {
 	int mode = spa->spa_mode;
 
 	spa_unload(spa);
 	spa_deactivate(spa);
 
 	spa->spa_load_max_txg = spa->spa_uberblock.ub_txg - 1;
 
 	spa_activate(spa, mode);
 	spa_async_suspend(spa);
 
 	return (spa_load(spa, state, SPA_IMPORT_EXISTING, mosconfig));
 }
 
 /*
  * If spa_load() fails this function will try loading prior txg's. If
  * 'state' is SPA_LOAD_RECOVER and one of these loads succeeds the pool
  * will be rewound to that txg. If 'state' is not SPA_LOAD_RECOVER this
  * function will not rewind the pool and will return the same error as
  * spa_load().
  */
 static int
 spa_load_best(spa_t *spa, spa_load_state_t state, int mosconfig,
     uint64_t max_request, int rewind_flags)
 {
 	nvlist_t *loadinfo = NULL;
 	nvlist_t *config = NULL;
 	int load_error, rewind_error;
 	uint64_t safe_rewind_txg;
 	uint64_t min_txg;
 
 	if (spa->spa_load_txg && state == SPA_LOAD_RECOVER) {
 		spa->spa_load_max_txg = spa->spa_load_txg;
 		spa_set_log_state(spa, SPA_LOG_CLEAR);
 	} else {
 		spa->spa_load_max_txg = max_request;
 		if (max_request != UINT64_MAX)
 			spa->spa_extreme_rewind = B_TRUE;
 	}
 
 	load_error = rewind_error = spa_load(spa, state, SPA_IMPORT_EXISTING,
 	    mosconfig);
 	if (load_error == 0)
 		return (0);
 
 	if (spa->spa_root_vdev != NULL)
 		config = spa_config_generate(spa, NULL, -1ULL, B_TRUE);
 
 	spa->spa_last_ubsync_txg = spa->spa_uberblock.ub_txg;
 	spa->spa_last_ubsync_txg_ts = spa->spa_uberblock.ub_timestamp;
 
 	if (rewind_flags & ZPOOL_NEVER_REWIND) {
 		nvlist_free(config);
 		return (load_error);
 	}
 
 	if (state == SPA_LOAD_RECOVER) {
 		/* Price of rolling back is discarding txgs, including log */
 		spa_set_log_state(spa, SPA_LOG_CLEAR);
 	} else {
 		/*
 		 * If we aren't rolling back save the load info from our first
 		 * import attempt so that we can restore it after attempting
 		 * to rewind.
 		 */
 		loadinfo = spa->spa_load_info;
 		spa->spa_load_info = fnvlist_alloc();
 	}
 
 	spa->spa_load_max_txg = spa->spa_last_ubsync_txg;
 	safe_rewind_txg = spa->spa_last_ubsync_txg - TXG_DEFER_SIZE;
 	min_txg = (rewind_flags & ZPOOL_EXTREME_REWIND) ?
 	    TXG_INITIAL : safe_rewind_txg;
 
 	/*
 	 * Continue as long as we're finding errors, we're still within
 	 * the acceptable rewind range, and we're still finding uberblocks
 	 */
 	while (rewind_error && spa->spa_uberblock.ub_txg >= min_txg &&
 	    spa->spa_uberblock.ub_txg <= spa->spa_load_max_txg) {
 		if (spa->spa_load_max_txg < safe_rewind_txg)
 			spa->spa_extreme_rewind = B_TRUE;
 		rewind_error = spa_load_retry(spa, state, mosconfig);
 	}
 
 	spa->spa_extreme_rewind = B_FALSE;
 	spa->spa_load_max_txg = UINT64_MAX;
 
 	if (config && (rewind_error || state != SPA_LOAD_RECOVER))
 		spa_config_set(spa, config);
 
 	if (state == SPA_LOAD_RECOVER) {
 		ASSERT3P(loadinfo, ==, NULL);
 		return (rewind_error);
 	} else {
 		/* Store the rewind info as part of the initial load info */
 		fnvlist_add_nvlist(loadinfo, ZPOOL_CONFIG_REWIND_INFO,
 		    spa->spa_load_info);
 
 		/* Restore the initial load info */
 		fnvlist_free(spa->spa_load_info);
 		spa->spa_load_info = loadinfo;
 
 		return (load_error);
 	}
 }
 
 /*
  * Pool Open/Import
  *
  * The import case is identical to an open except that the configuration is sent
  * down from userland, instead of grabbed from the configuration cache.  For the
  * case of an open, the pool configuration will exist in the
  * POOL_STATE_UNINITIALIZED state.
  *
  * The stats information (gen/count/ustats) is used to gather vdev statistics at
  * the same time open the pool, without having to keep around the spa_t in some
  * ambiguous state.
  */
 static int
 spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t *nvpolicy,
     nvlist_t **config)
 {
 	spa_t *spa;
 	spa_load_state_t state = SPA_LOAD_OPEN;
 	int error;
 	int locked = B_FALSE;
 	int firstopen = B_FALSE;
 
 	*spapp = NULL;
 
 	/*
 	 * As disgusting as this is, we need to support recursive calls to this
 	 * function because dsl_dir_open() is called during spa_load(), and ends
 	 * up calling spa_open() again.  The real fix is to figure out how to
 	 * avoid dsl_dir_open() calling this in the first place.
 	 */
 	if (mutex_owner(&spa_namespace_lock) != curthread) {
 		mutex_enter(&spa_namespace_lock);
 		locked = B_TRUE;
 	}
 
 	if ((spa = spa_lookup(pool)) == NULL) {
 		if (locked)
 			mutex_exit(&spa_namespace_lock);
 		return (SET_ERROR(ENOENT));
 	}
 
 	if (spa->spa_state == POOL_STATE_UNINITIALIZED) {
 		zpool_rewind_policy_t policy;
 
 		firstopen = B_TRUE;
 
 		zpool_get_rewind_policy(nvpolicy ? nvpolicy : spa->spa_config,
 		    &policy);
 		if (policy.zrp_request & ZPOOL_DO_REWIND)
 			state = SPA_LOAD_RECOVER;
 
 		spa_activate(spa, spa_mode_global);
 
 		if (state != SPA_LOAD_RECOVER)
 			spa->spa_last_ubsync_txg = spa->spa_load_txg = 0;
 
 		error = spa_load_best(spa, state, B_FALSE, policy.zrp_txg,
 		    policy.zrp_request);
 
 		if (error == EBADF) {
 			/*
 			 * If vdev_validate() returns failure (indicated by
 			 * EBADF), it indicates that one of the vdevs indicates
 			 * that the pool has been exported or destroyed.  If
 			 * this is the case, the config cache is out of sync and
 			 * we should remove the pool from the namespace.
 			 */
 			spa_unload(spa);
 			spa_deactivate(spa);
 			spa_config_sync(spa, B_TRUE, B_TRUE);
 			spa_remove(spa);
 			if (locked)
 				mutex_exit(&spa_namespace_lock);
 			return (SET_ERROR(ENOENT));
 		}
 
 		if (error) {
 			/*
 			 * We can't open the pool, but we still have useful
 			 * information: the state of each vdev after the
 			 * attempted vdev_open().  Return this to the user.
 			 */
 			if (config != NULL && spa->spa_config) {
 				VERIFY(nvlist_dup(spa->spa_config, config,
 				    KM_SLEEP) == 0);
 				VERIFY(nvlist_add_nvlist(*config,
 				    ZPOOL_CONFIG_LOAD_INFO,
 				    spa->spa_load_info) == 0);
 			}
 			spa_unload(spa);
 			spa_deactivate(spa);
 			spa->spa_last_open_failed = error;
 			if (locked)
 				mutex_exit(&spa_namespace_lock);
 			*spapp = NULL;
 			return (error);
 		}
 	}
 
 	spa_open_ref(spa, tag);
 
 	if (config != NULL)
 		*config = spa_config_generate(spa, NULL, -1ULL, B_TRUE);
 
 	/*
 	 * If we've recovered the pool, pass back any information we
 	 * gathered while doing the load.
 	 */
 	if (state == SPA_LOAD_RECOVER) {
 		VERIFY(nvlist_add_nvlist(*config, ZPOOL_CONFIG_LOAD_INFO,
 		    spa->spa_load_info) == 0);
 	}
 
 	if (locked) {
 		spa->spa_last_open_failed = 0;
 		spa->spa_last_ubsync_txg = 0;
 		spa->spa_load_txg = 0;
 		mutex_exit(&spa_namespace_lock);
 #ifdef __FreeBSD__
 #ifdef _KERNEL
 		if (firstopen)
 			zvol_create_minors(spa->spa_name);
 #endif
 #endif
 	}
 
 	*spapp = spa;
 
 	return (0);
 }
 
 int
 spa_open_rewind(const char *name, spa_t **spapp, void *tag, nvlist_t *policy,
     nvlist_t **config)
 {
 	return (spa_open_common(name, spapp, tag, policy, config));
 }
 
 int
 spa_open(const char *name, spa_t **spapp, void *tag)
 {
 	return (spa_open_common(name, spapp, tag, NULL, NULL));
 }
 
 /*
  * Lookup the given spa_t, incrementing the inject count in the process,
  * preventing it from being exported or destroyed.
  */
 spa_t *
 spa_inject_addref(char *name)
 {
 	spa_t *spa;
 
 	mutex_enter(&spa_namespace_lock);
 	if ((spa = spa_lookup(name)) == NULL) {
 		mutex_exit(&spa_namespace_lock);
 		return (NULL);
 	}
 	spa->spa_inject_ref++;
 	mutex_exit(&spa_namespace_lock);
 
 	return (spa);
 }
 
 void
 spa_inject_delref(spa_t *spa)
 {
 	mutex_enter(&spa_namespace_lock);
 	spa->spa_inject_ref--;
 	mutex_exit(&spa_namespace_lock);
 }
 
 /*
  * Add spares device information to the nvlist.
  */
 static void
 spa_add_spares(spa_t *spa, nvlist_t *config)
 {
 	nvlist_t **spares;
 	uint_t i, nspares;
 	nvlist_t *nvroot;
 	uint64_t guid;
 	vdev_stat_t *vs;
 	uint_t vsc;
 	uint64_t pool;
 
 	ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER));
 
 	if (spa->spa_spares.sav_count == 0)
 		return;
 
 	VERIFY(nvlist_lookup_nvlist(config,
 	    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
 	VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config,
 	    ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0);
 	if (nspares != 0) {
 		VERIFY(nvlist_add_nvlist_array(nvroot,
 		    ZPOOL_CONFIG_SPARES, spares, nspares) == 0);
 		VERIFY(nvlist_lookup_nvlist_array(nvroot,
 		    ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0);
 
 		/*
 		 * Go through and find any spares which have since been
 		 * repurposed as an active spare.  If this is the case, update
 		 * their status appropriately.
 		 */
 		for (i = 0; i < nspares; i++) {
 			VERIFY(nvlist_lookup_uint64(spares[i],
 			    ZPOOL_CONFIG_GUID, &guid) == 0);
 			if (spa_spare_exists(guid, &pool, NULL) &&
 			    pool != 0ULL) {
 				VERIFY(nvlist_lookup_uint64_array(
 				    spares[i], ZPOOL_CONFIG_VDEV_STATS,
 				    (uint64_t **)&vs, &vsc) == 0);
 				vs->vs_state = VDEV_STATE_CANT_OPEN;
 				vs->vs_aux = VDEV_AUX_SPARED;
 			}
 		}
 	}
 }
 
 /*
  * Add l2cache device information to the nvlist, including vdev stats.
  */
 static void
 spa_add_l2cache(spa_t *spa, nvlist_t *config)
 {
 	nvlist_t **l2cache;
 	uint_t i, j, nl2cache;
 	nvlist_t *nvroot;
 	uint64_t guid;
 	vdev_t *vd;
 	vdev_stat_t *vs;
 	uint_t vsc;
 
 	ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER));
 
 	if (spa->spa_l2cache.sav_count == 0)
 		return;
 
 	VERIFY(nvlist_lookup_nvlist(config,
 	    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
 	VERIFY(nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config,
 	    ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0);
 	if (nl2cache != 0) {
 		VERIFY(nvlist_add_nvlist_array(nvroot,
 		    ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0);
 		VERIFY(nvlist_lookup_nvlist_array(nvroot,
 		    ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0);
 
 		/*
 		 * Update level 2 cache device stats.
 		 */
 
 		for (i = 0; i < nl2cache; i++) {
 			VERIFY(nvlist_lookup_uint64(l2cache[i],
 			    ZPOOL_CONFIG_GUID, &guid) == 0);
 
 			vd = NULL;
 			for (j = 0; j < spa->spa_l2cache.sav_count; j++) {
 				if (guid ==
 				    spa->spa_l2cache.sav_vdevs[j]->vdev_guid) {
 					vd = spa->spa_l2cache.sav_vdevs[j];
 					break;
 				}
 			}
 			ASSERT(vd != NULL);
 
 			VERIFY(nvlist_lookup_uint64_array(l2cache[i],
 			    ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc)
 			    == 0);
 			vdev_get_stats(vd, vs);
 		}
 	}
 }
 
 static void
 spa_add_feature_stats(spa_t *spa, nvlist_t *config)
 {
 	nvlist_t *features;
 	zap_cursor_t zc;
 	zap_attribute_t za;
 
 	ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER));
 	VERIFY(nvlist_alloc(&features, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 
 	/* We may be unable to read features if pool is suspended. */
 	if (spa_suspended(spa))
 		goto out;
 
 	if (spa->spa_feat_for_read_obj != 0) {
 		for (zap_cursor_init(&zc, spa->spa_meta_objset,
 		    spa->spa_feat_for_read_obj);
 		    zap_cursor_retrieve(&zc, &za) == 0;
 		    zap_cursor_advance(&zc)) {
 			ASSERT(za.za_integer_length == sizeof (uint64_t) &&
 			    za.za_num_integers == 1);
 			VERIFY3U(0, ==, nvlist_add_uint64(features, za.za_name,
 			    za.za_first_integer));
 		}
 		zap_cursor_fini(&zc);
 	}
 
 	if (spa->spa_feat_for_write_obj != 0) {
 		for (zap_cursor_init(&zc, spa->spa_meta_objset,
 		    spa->spa_feat_for_write_obj);
 		    zap_cursor_retrieve(&zc, &za) == 0;
 		    zap_cursor_advance(&zc)) {
 			ASSERT(za.za_integer_length == sizeof (uint64_t) &&
 			    za.za_num_integers == 1);
 			VERIFY3U(0, ==, nvlist_add_uint64(features, za.za_name,
 			    za.za_first_integer));
 		}
 		zap_cursor_fini(&zc);
 	}
 
 out:
 	VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_FEATURE_STATS,
 	    features) == 0);
 	nvlist_free(features);
 }
 
 int
 spa_get_stats(const char *name, nvlist_t **config,
     char *altroot, size_t buflen)
 {
 	int error;
 	spa_t *spa;
 
 	*config = NULL;
 	error = spa_open_common(name, &spa, FTAG, NULL, config);
 
 	if (spa != NULL) {
 		/*
 		 * This still leaves a window of inconsistency where the spares
 		 * or l2cache devices could change and the config would be
 		 * self-inconsistent.
 		 */
 		spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
 
 		if (*config != NULL) {
 			uint64_t loadtimes[2];
 
 			loadtimes[0] = spa->spa_loaded_ts.tv_sec;
 			loadtimes[1] = spa->spa_loaded_ts.tv_nsec;
 			VERIFY(nvlist_add_uint64_array(*config,
 			    ZPOOL_CONFIG_LOADED_TIME, loadtimes, 2) == 0);
 
 			VERIFY(nvlist_add_uint64(*config,
 			    ZPOOL_CONFIG_ERRCOUNT,
 			    spa_get_errlog_size(spa)) == 0);
 
 			if (spa_suspended(spa))
 				VERIFY(nvlist_add_uint64(*config,
 				    ZPOOL_CONFIG_SUSPENDED,
 				    spa->spa_failmode) == 0);
 
 			spa_add_spares(spa, *config);
 			spa_add_l2cache(spa, *config);
 			spa_add_feature_stats(spa, *config);
 		}
 	}
 
 	/*
 	 * We want to get the alternate root even for faulted pools, so we cheat
 	 * and call spa_lookup() directly.
 	 */
 	if (altroot) {
 		if (spa == NULL) {
 			mutex_enter(&spa_namespace_lock);
 			spa = spa_lookup(name);
 			if (spa)
 				spa_altroot(spa, altroot, buflen);
 			else
 				altroot[0] = '\0';
 			spa = NULL;
 			mutex_exit(&spa_namespace_lock);
 		} else {
 			spa_altroot(spa, altroot, buflen);
 		}
 	}
 
 	if (spa != NULL) {
 		spa_config_exit(spa, SCL_CONFIG, FTAG);
 		spa_close(spa, FTAG);
 	}
 
 	return (error);
 }
 
 /*
  * Validate that the auxiliary device array is well formed.  We must have an
  * array of nvlists, each which describes a valid leaf vdev.  If this is an
  * import (mode is VDEV_ALLOC_SPARE), then we allow corrupted spares to be
  * specified, as long as they are well-formed.
  */
 static int
 spa_validate_aux_devs(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode,
     spa_aux_vdev_t *sav, const char *config, uint64_t version,
     vdev_labeltype_t label)
 {
 	nvlist_t **dev;
 	uint_t i, ndev;
 	vdev_t *vd;
 	int error;
 
 	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
 
 	/*
 	 * It's acceptable to have no devs specified.
 	 */
 	if (nvlist_lookup_nvlist_array(nvroot, config, &dev, &ndev) != 0)
 		return (0);
 
 	if (ndev == 0)
 		return (SET_ERROR(EINVAL));
 
 	/*
 	 * Make sure the pool is formatted with a version that supports this
 	 * device type.
 	 */
 	if (spa_version(spa) < version)
 		return (SET_ERROR(ENOTSUP));
 
 	/*
 	 * Set the pending device list so we correctly handle device in-use
 	 * checking.
 	 */
 	sav->sav_pending = dev;
 	sav->sav_npending = ndev;
 
 	for (i = 0; i < ndev; i++) {
 		if ((error = spa_config_parse(spa, &vd, dev[i], NULL, 0,
 		    mode)) != 0)
 			goto out;
 
 		if (!vd->vdev_ops->vdev_op_leaf) {
 			vdev_free(vd);
 			error = SET_ERROR(EINVAL);
 			goto out;
 		}
 
 		/*
 		 * The L2ARC currently only supports disk devices in
 		 * kernel context.  For user-level testing, we allow it.
 		 */
 #ifdef _KERNEL
 		if ((strcmp(config, ZPOOL_CONFIG_L2CACHE) == 0) &&
 		    strcmp(vd->vdev_ops->vdev_op_type, VDEV_TYPE_DISK) != 0) {
 			error = SET_ERROR(ENOTBLK);
 			vdev_free(vd);
 			goto out;
 		}
 #endif
 		vd->vdev_top = vd;
 
 		if ((error = vdev_open(vd)) == 0 &&
 		    (error = vdev_label_init(vd, crtxg, label)) == 0) {
 			VERIFY(nvlist_add_uint64(dev[i], ZPOOL_CONFIG_GUID,
 			    vd->vdev_guid) == 0);
 		}
 
 		vdev_free(vd);
 
 		if (error &&
 		    (mode != VDEV_ALLOC_SPARE && mode != VDEV_ALLOC_L2CACHE))
 			goto out;
 		else
 			error = 0;
 	}
 
 out:
 	sav->sav_pending = NULL;
 	sav->sav_npending = 0;
 	return (error);
 }
 
 static int
 spa_validate_aux(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode)
 {
 	int error;
 
 	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
 
 	if ((error = spa_validate_aux_devs(spa, nvroot, crtxg, mode,
 	    &spa->spa_spares, ZPOOL_CONFIG_SPARES, SPA_VERSION_SPARES,
 	    VDEV_LABEL_SPARE)) != 0) {
 		return (error);
 	}
 
 	return (spa_validate_aux_devs(spa, nvroot, crtxg, mode,
 	    &spa->spa_l2cache, ZPOOL_CONFIG_L2CACHE, SPA_VERSION_L2CACHE,
 	    VDEV_LABEL_L2CACHE));
 }
 
 static void
 spa_set_aux_vdevs(spa_aux_vdev_t *sav, nvlist_t **devs, int ndevs,
     const char *config)
 {
 	int i;
 
 	if (sav->sav_config != NULL) {
 		nvlist_t **olddevs;
 		uint_t oldndevs;
 		nvlist_t **newdevs;
 
 		/*
 		 * Generate new dev list by concatentating with the
 		 * current dev list.
 		 */
 		VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, config,
 		    &olddevs, &oldndevs) == 0);
 
 		newdevs = kmem_alloc(sizeof (void *) *
 		    (ndevs + oldndevs), KM_SLEEP);
 		for (i = 0; i < oldndevs; i++)
 			VERIFY(nvlist_dup(olddevs[i], &newdevs[i],
 			    KM_SLEEP) == 0);
 		for (i = 0; i < ndevs; i++)
 			VERIFY(nvlist_dup(devs[i], &newdevs[i + oldndevs],
 			    KM_SLEEP) == 0);
 
 		VERIFY(nvlist_remove(sav->sav_config, config,
 		    DATA_TYPE_NVLIST_ARRAY) == 0);
 
 		VERIFY(nvlist_add_nvlist_array(sav->sav_config,
 		    config, newdevs, ndevs + oldndevs) == 0);
 		for (i = 0; i < oldndevs + ndevs; i++)
 			nvlist_free(newdevs[i]);
 		kmem_free(newdevs, (oldndevs + ndevs) * sizeof (void *));
 	} else {
 		/*
 		 * Generate a new dev list.
 		 */
 		VERIFY(nvlist_alloc(&sav->sav_config, NV_UNIQUE_NAME,
 		    KM_SLEEP) == 0);
 		VERIFY(nvlist_add_nvlist_array(sav->sav_config, config,
 		    devs, ndevs) == 0);
 	}
 }
 
 /*
  * Stop and drop level 2 ARC devices
  */
 void
 spa_l2cache_drop(spa_t *spa)
 {
 	vdev_t *vd;
 	int i;
 	spa_aux_vdev_t *sav = &spa->spa_l2cache;
 
 	for (i = 0; i < sav->sav_count; i++) {
 		uint64_t pool;
 
 		vd = sav->sav_vdevs[i];
 		ASSERT(vd != NULL);
 
 		if (spa_l2cache_exists(vd->vdev_guid, &pool) &&
 		    pool != 0ULL && l2arc_vdev_present(vd))
 			l2arc_remove_vdev(vd);
 	}
 }
 
 /*
  * Pool Creation
  */
 int
 spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
     nvlist_t *zplprops)
 {
 	spa_t *spa;
 	char *altroot = NULL;
 	vdev_t *rvd;
 	dsl_pool_t *dp;
 	dmu_tx_t *tx;
 	int error = 0;
 	uint64_t txg = TXG_INITIAL;
 	nvlist_t **spares, **l2cache;
 	uint_t nspares, nl2cache;
 	uint64_t version, obj;
 	boolean_t has_features;
 
 	/*
 	 * If this pool already exists, return failure.
 	 */
 	mutex_enter(&spa_namespace_lock);
 	if (spa_lookup(pool) != NULL) {
 		mutex_exit(&spa_namespace_lock);
 		return (SET_ERROR(EEXIST));
 	}
 
 	/*
 	 * Allocate a new spa_t structure.
 	 */
 	(void) nvlist_lookup_string(props,
 	    zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot);
 	spa = spa_add(pool, NULL, altroot);
 	spa_activate(spa, spa_mode_global);
 
 	if (props && (error = spa_prop_validate(spa, props))) {
 		spa_deactivate(spa);
 		spa_remove(spa);
 		mutex_exit(&spa_namespace_lock);
 		return (error);
 	}
 
 	has_features = B_FALSE;
 	for (nvpair_t *elem = nvlist_next_nvpair(props, NULL);
 	    elem != NULL; elem = nvlist_next_nvpair(props, elem)) {
 		if (zpool_prop_feature(nvpair_name(elem)))
 			has_features = B_TRUE;
 	}
 
 	if (has_features || nvlist_lookup_uint64(props,
 	    zpool_prop_to_name(ZPOOL_PROP_VERSION), &version) != 0) {
 		version = SPA_VERSION;
 	}
 	ASSERT(SPA_VERSION_IS_SUPPORTED(version));
 
 	spa->spa_first_txg = txg;
 	spa->spa_uberblock.ub_txg = txg - 1;
 	spa->spa_uberblock.ub_version = version;
 	spa->spa_ubsync = spa->spa_uberblock;
 	spa->spa_load_state = SPA_LOAD_CREATE;
 
 	/*
 	 * Create "The Godfather" zio to hold all async IOs
 	 */
 	spa->spa_async_zio_root = kmem_alloc(max_ncpus * sizeof (void *),
 	    KM_SLEEP);
 	for (int i = 0; i < max_ncpus; i++) {
 		spa->spa_async_zio_root[i] = zio_root(spa, NULL, NULL,
 		    ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE |
 		    ZIO_FLAG_GODFATHER);
 	}
 
 	/*
 	 * Create the root vdev.
 	 */
 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
 
 	error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_ADD);
 
 	ASSERT(error != 0 || rvd != NULL);
 	ASSERT(error != 0 || spa->spa_root_vdev == rvd);
 
 	if (error == 0 && !zfs_allocatable_devs(nvroot))
 		error = SET_ERROR(EINVAL);
 
 	if (error == 0 &&
 	    (error = vdev_create(rvd, txg, B_FALSE)) == 0 &&
 	    (error = spa_validate_aux(spa, nvroot, txg,
 	    VDEV_ALLOC_ADD)) == 0) {
 		for (int c = 0; c < rvd->vdev_children; c++) {
 			vdev_ashift_optimize(rvd->vdev_child[c]);
 			vdev_metaslab_set_size(rvd->vdev_child[c]);
 			vdev_expand(rvd->vdev_child[c], txg);
 		}
 	}
 
 	spa_config_exit(spa, SCL_ALL, FTAG);
 
 	if (error != 0) {
 		spa_unload(spa);
 		spa_deactivate(spa);
 		spa_remove(spa);
 		mutex_exit(&spa_namespace_lock);
 		return (error);
 	}
 
 	/*
 	 * Get the list of spares, if specified.
 	 */
 	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
 	    &spares, &nspares) == 0) {
 		VERIFY(nvlist_alloc(&spa->spa_spares.sav_config, NV_UNIQUE_NAME,
 		    KM_SLEEP) == 0);
 		VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config,
 		    ZPOOL_CONFIG_SPARES, spares, nspares) == 0);
 		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
 		spa_load_spares(spa);
 		spa_config_exit(spa, SCL_ALL, FTAG);
 		spa->spa_spares.sav_sync = B_TRUE;
 	}
 
 	/*
 	 * Get the list of level 2 cache devices, if specified.
 	 */
 	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
 	    &l2cache, &nl2cache) == 0) {
 		VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config,
 		    NV_UNIQUE_NAME, KM_SLEEP) == 0);
 		VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config,
 		    ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0);
 		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
 		spa_load_l2cache(spa);
 		spa_config_exit(spa, SCL_ALL, FTAG);
 		spa->spa_l2cache.sav_sync = B_TRUE;
 	}
 
 	spa->spa_is_initializing = B_TRUE;
 	spa->spa_dsl_pool = dp = dsl_pool_create(spa, zplprops, txg);
 	spa->spa_meta_objset = dp->dp_meta_objset;
 	spa->spa_is_initializing = B_FALSE;
 
 	/*
 	 * Create DDTs (dedup tables).
 	 */
 	ddt_create(spa);
 
 	spa_update_dspace(spa);
 
 	tx = dmu_tx_create_assigned(dp, txg);
 
 	/*
 	 * Create the pool config object.
 	 */
 	spa->spa_config_object = dmu_object_alloc(spa->spa_meta_objset,
 	    DMU_OT_PACKED_NVLIST, SPA_CONFIG_BLOCKSIZE,
 	    DMU_OT_PACKED_NVLIST_SIZE, sizeof (uint64_t), tx);
 
 	if (zap_add(spa->spa_meta_objset,
 	    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG,
 	    sizeof (uint64_t), 1, &spa->spa_config_object, tx) != 0) {
 		cmn_err(CE_PANIC, "failed to add pool config");
 	}
 
 	if (spa_version(spa) >= SPA_VERSION_FEATURES)
 		spa_feature_create_zap_objects(spa, tx);
 
 	if (zap_add(spa->spa_meta_objset,
 	    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CREATION_VERSION,
 	    sizeof (uint64_t), 1, &version, tx) != 0) {
 		cmn_err(CE_PANIC, "failed to add pool version");
 	}
 
 	/* Newly created pools with the right version are always deflated. */
 	if (version >= SPA_VERSION_RAIDZ_DEFLATE) {
 		spa->spa_deflate = TRUE;
 		if (zap_add(spa->spa_meta_objset,
 		    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE,
 		    sizeof (uint64_t), 1, &spa->spa_deflate, tx) != 0) {
 			cmn_err(CE_PANIC, "failed to add deflate");
 		}
 	}
 
 	/*
 	 * Create the deferred-free bpobj.  Turn off compression
 	 * because sync-to-convergence takes longer if the blocksize
 	 * keeps changing.
 	 */
 	obj = bpobj_alloc(spa->spa_meta_objset, 1 << 14, tx);
 	dmu_object_set_compress(spa->spa_meta_objset, obj,
 	    ZIO_COMPRESS_OFF, tx);
 	if (zap_add(spa->spa_meta_objset,
 	    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPOBJ,
 	    sizeof (uint64_t), 1, &obj, tx) != 0) {
 		cmn_err(CE_PANIC, "failed to add bpobj");
 	}
 	VERIFY3U(0, ==, bpobj_open(&spa->spa_deferred_bpobj,
 	    spa->spa_meta_objset, obj));
 
 	/*
 	 * Create the pool's history object.
 	 */
 	if (version >= SPA_VERSION_ZPOOL_HISTORY)
 		spa_history_create_obj(spa, tx);
 
 	/*
 	 * Generate some random noise for salted checksums to operate on.
 	 */
 	(void) random_get_pseudo_bytes(spa->spa_cksum_salt.zcs_bytes,
 	    sizeof (spa->spa_cksum_salt.zcs_bytes));
 
 	/*
 	 * Set pool properties.
 	 */
 	spa->spa_bootfs = zpool_prop_default_numeric(ZPOOL_PROP_BOOTFS);
 	spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION);
 	spa->spa_failmode = zpool_prop_default_numeric(ZPOOL_PROP_FAILUREMODE);
 	spa->spa_autoexpand = zpool_prop_default_numeric(ZPOOL_PROP_AUTOEXPAND);
 
 	if (props != NULL) {
 		spa_configfile_set(spa, props, B_FALSE);
 		spa_sync_props(props, tx);
 	}
 
 	dmu_tx_commit(tx);
 
 	spa->spa_sync_on = B_TRUE;
 	txg_sync_start(spa->spa_dsl_pool);
 
 	/*
 	 * We explicitly wait for the first transaction to complete so that our
 	 * bean counters are appropriately updated.
 	 */
 	txg_wait_synced(spa->spa_dsl_pool, txg);
 
 	spa_config_sync(spa, B_FALSE, B_TRUE);
 	spa_event_notify(spa, NULL, ESC_ZFS_POOL_CREATE);
 
 	spa_history_log_version(spa, "create");
 
 	/*
 	 * Don't count references from objsets that are already closed
 	 * and are making their way through the eviction process.
 	 */
 	spa_evicting_os_wait(spa);
 	spa->spa_minref = refcount_count(&spa->spa_refcount);
 	spa->spa_load_state = SPA_LOAD_NONE;
 
 	mutex_exit(&spa_namespace_lock);
 
 	return (0);
 }
 
 #ifdef _KERNEL
 #ifdef illumos
 /*
  * Get the root pool information from the root disk, then import the root pool
  * during the system boot up time.
  */
 extern int vdev_disk_read_rootlabel(char *, char *, nvlist_t **);
 
 static nvlist_t *
 spa_generate_rootconf(char *devpath, char *devid, uint64_t *guid)
 {
 	nvlist_t *config;
 	nvlist_t *nvtop, *nvroot;
 	uint64_t pgid;
 
 	if (vdev_disk_read_rootlabel(devpath, devid, &config) != 0)
 		return (NULL);
 
 	/*
 	 * Add this top-level vdev to the child array.
 	 */
 	VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
 	    &nvtop) == 0);
 	VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
 	    &pgid) == 0);
 	VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, guid) == 0);
 
 	/*
 	 * Put this pool's top-level vdevs into a root vdev.
 	 */
 	VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 	VERIFY(nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE,
 	    VDEV_TYPE_ROOT) == 0);
 	VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) == 0);
 	VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, pgid) == 0);
 	VERIFY(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
 	    &nvtop, 1) == 0);
 
 	/*
 	 * Replace the existing vdev_tree with the new root vdev in
 	 * this pool's configuration (remove the old, add the new).
 	 */
 	VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0);
 	nvlist_free(nvroot);
 	return (config);
 }
 
 /*
  * Walk the vdev tree and see if we can find a device with "better"
  * configuration. A configuration is "better" if the label on that
  * device has a more recent txg.
  */
 static void
 spa_alt_rootvdev(vdev_t *vd, vdev_t **avd, uint64_t *txg)
 {
 	for (int c = 0; c < vd->vdev_children; c++)
 		spa_alt_rootvdev(vd->vdev_child[c], avd, txg);
 
 	if (vd->vdev_ops->vdev_op_leaf) {
 		nvlist_t *label;
 		uint64_t label_txg;
 
 		if (vdev_disk_read_rootlabel(vd->vdev_physpath, vd->vdev_devid,
 		    &label) != 0)
 			return;
 
 		VERIFY(nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_TXG,
 		    &label_txg) == 0);
 
 		/*
 		 * Do we have a better boot device?
 		 */
 		if (label_txg > *txg) {
 			*txg = label_txg;
 			*avd = vd;
 		}
 		nvlist_free(label);
 	}
 }
 
 /*
  * Import a root pool.
  *
  * For x86. devpath_list will consist of devid and/or physpath name of
  * the vdev (e.g. "id1,sd@SSEAGATE..." or "/pci@1f,0/ide@d/disk@0,0:a").
  * The GRUB "findroot" command will return the vdev we should boot.
  *
  * For Sparc, devpath_list consists the physpath name of the booting device
  * no matter the rootpool is a single device pool or a mirrored pool.
  * e.g.
  *	"/pci@1f,0/ide@d/disk@0,0:a"
  */
 int
 spa_import_rootpool(char *devpath, char *devid)
 {
 	spa_t *spa;
 	vdev_t *rvd, *bvd, *avd = NULL;
 	nvlist_t *config, *nvtop;
 	uint64_t guid, txg;
 	char *pname;
 	int error;
 
 	/*
 	 * Read the label from the boot device and generate a configuration.
 	 */
 	config = spa_generate_rootconf(devpath, devid, &guid);
 #if defined(_OBP) && defined(_KERNEL)
 	if (config == NULL) {
 		if (strstr(devpath, "/iscsi/ssd") != NULL) {
 			/* iscsi boot */
 			get_iscsi_bootpath_phy(devpath);
 			config = spa_generate_rootconf(devpath, devid, &guid);
 		}
 	}
 #endif
 	if (config == NULL) {
 		cmn_err(CE_NOTE, "Cannot read the pool label from '%s'",
 		    devpath);
 		return (SET_ERROR(EIO));
 	}
 
 	VERIFY(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
 	    &pname) == 0);
 	VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, &txg) == 0);
 
 	mutex_enter(&spa_namespace_lock);
 	if ((spa = spa_lookup(pname)) != NULL) {
 		/*
 		 * Remove the existing root pool from the namespace so that we
 		 * can replace it with the correct config we just read in.
 		 */
 		spa_remove(spa);
 	}
 
 	spa = spa_add(pname, config, NULL);
 	spa->spa_is_root = B_TRUE;
 	spa->spa_import_flags = ZFS_IMPORT_VERBATIM;
 
 	/*
 	 * Build up a vdev tree based on the boot device's label config.
 	 */
 	VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
 	    &nvtop) == 0);
 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
 	error = spa_config_parse(spa, &rvd, nvtop, NULL, 0,
 	    VDEV_ALLOC_ROOTPOOL);
 	spa_config_exit(spa, SCL_ALL, FTAG);
 	if (error) {
 		mutex_exit(&spa_namespace_lock);
 		nvlist_free(config);
 		cmn_err(CE_NOTE, "Can not parse the config for pool '%s'",
 		    pname);
 		return (error);
 	}
 
 	/*
 	 * Get the boot vdev.
 	 */
 	if ((bvd = vdev_lookup_by_guid(rvd, guid)) == NULL) {
 		cmn_err(CE_NOTE, "Can not find the boot vdev for guid %llu",
 		    (u_longlong_t)guid);
 		error = SET_ERROR(ENOENT);
 		goto out;
 	}
 
 	/*
 	 * Determine if there is a better boot device.
 	 */
 	avd = bvd;
 	spa_alt_rootvdev(rvd, &avd, &txg);
 	if (avd != bvd) {
 		cmn_err(CE_NOTE, "The boot device is 'degraded'. Please "
 		    "try booting from '%s'", avd->vdev_path);
 		error = SET_ERROR(EINVAL);
 		goto out;
 	}
 
 	/*
 	 * If the boot device is part of a spare vdev then ensure that
 	 * we're booting off the active spare.
 	 */
 	if (bvd->vdev_parent->vdev_ops == &vdev_spare_ops &&
 	    !bvd->vdev_isspare) {
 		cmn_err(CE_NOTE, "The boot device is currently spared. Please "
 		    "try booting from '%s'",
 		    bvd->vdev_parent->
 		    vdev_child[bvd->vdev_parent->vdev_children - 1]->vdev_path);
 		error = SET_ERROR(EINVAL);
 		goto out;
 	}
 
 	error = 0;
 out:
 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
 	vdev_free(rvd);
 	spa_config_exit(spa, SCL_ALL, FTAG);
 	mutex_exit(&spa_namespace_lock);
 
 	nvlist_free(config);
 	return (error);
 }
 
 #else	/* !illumos */
 
 extern int vdev_geom_read_pool_label(const char *name, nvlist_t ***configs,
     uint64_t *count);
 
 static nvlist_t *
 spa_generate_rootconf(const char *name)
 {
 	nvlist_t **configs, **tops;
 	nvlist_t *config;
 	nvlist_t *best_cfg, *nvtop, *nvroot;
 	uint64_t *holes;
 	uint64_t best_txg;
 	uint64_t nchildren;
 	uint64_t pgid;
 	uint64_t count;
 	uint64_t i;
 	uint_t   nholes;
 
 	if (vdev_geom_read_pool_label(name, &configs, &count) != 0)
 		return (NULL);
 
 	ASSERT3U(count, !=, 0);
 	best_txg = 0;
 	for (i = 0; i < count; i++) {
 		uint64_t txg;
 
 		VERIFY(nvlist_lookup_uint64(configs[i], ZPOOL_CONFIG_POOL_TXG,
 		    &txg) == 0);
 		if (txg > best_txg) {
 			best_txg = txg;
 			best_cfg = configs[i];
 		}
 	}
 
 	nchildren = 1;
 	nvlist_lookup_uint64(best_cfg, ZPOOL_CONFIG_VDEV_CHILDREN, &nchildren);
 	holes = NULL;
 	nvlist_lookup_uint64_array(best_cfg, ZPOOL_CONFIG_HOLE_ARRAY,
 	    &holes, &nholes);
 
 	tops = kmem_zalloc(nchildren * sizeof(void *), KM_SLEEP);
 	for (i = 0; i < nchildren; i++) {
 		if (i >= count)
 			break;
 		if (configs[i] == NULL)
 			continue;
 		VERIFY(nvlist_lookup_nvlist(configs[i], ZPOOL_CONFIG_VDEV_TREE,
 		    &nvtop) == 0);
 		nvlist_dup(nvtop, &tops[i], KM_SLEEP);
 	}
 	for (i = 0; holes != NULL && i < nholes; i++) {
 		if (i >= nchildren)
 			continue;
 		if (tops[holes[i]] != NULL)
 			continue;
 		nvlist_alloc(&tops[holes[i]], NV_UNIQUE_NAME, KM_SLEEP);
 		VERIFY(nvlist_add_string(tops[holes[i]], ZPOOL_CONFIG_TYPE,
 		    VDEV_TYPE_HOLE) == 0);
 		VERIFY(nvlist_add_uint64(tops[holes[i]], ZPOOL_CONFIG_ID,
 		    holes[i]) == 0);
 		VERIFY(nvlist_add_uint64(tops[holes[i]], ZPOOL_CONFIG_GUID,
 		    0) == 0);
 	}
 	for (i = 0; i < nchildren; i++) {
 		if (tops[i] != NULL)
 			continue;
 		nvlist_alloc(&tops[i], NV_UNIQUE_NAME, KM_SLEEP);
 		VERIFY(nvlist_add_string(tops[i], ZPOOL_CONFIG_TYPE,
 		    VDEV_TYPE_MISSING) == 0);
 		VERIFY(nvlist_add_uint64(tops[i], ZPOOL_CONFIG_ID,
 		    i) == 0);
 		VERIFY(nvlist_add_uint64(tops[i], ZPOOL_CONFIG_GUID,
 		    0) == 0);
 	}
 
 	/*
 	 * Create pool config based on the best vdev config.
 	 */
 	nvlist_dup(best_cfg, &config, KM_SLEEP);
 
 	/*
 	 * Put this pool's top-level vdevs into a root vdev.
 	 */
 	VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
 	    &pgid) == 0);
 	VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 	VERIFY(nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE,
 	    VDEV_TYPE_ROOT) == 0);
 	VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) == 0);
 	VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, pgid) == 0);
 	VERIFY(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
 	    tops, nchildren) == 0);
 
 	/*
 	 * Replace the existing vdev_tree with the new root vdev in
 	 * this pool's configuration (remove the old, add the new).
 	 */
 	VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0);
 
 	/*
 	 * Drop vdev config elements that should not be present at pool level.
 	 */
 	nvlist_remove(config, ZPOOL_CONFIG_GUID, DATA_TYPE_UINT64);
 	nvlist_remove(config, ZPOOL_CONFIG_TOP_GUID, DATA_TYPE_UINT64);
 
 	for (i = 0; i < count; i++)
 		nvlist_free(configs[i]);
 	kmem_free(configs, count * sizeof(void *));
 	for (i = 0; i < nchildren; i++)
 		nvlist_free(tops[i]);
 	kmem_free(tops, nchildren * sizeof(void *));
 	nvlist_free(nvroot);
 	return (config);
 }
 
 int
 spa_import_rootpool(const char *name)
 {
 	spa_t *spa;
 	vdev_t *rvd, *bvd, *avd = NULL;
 	nvlist_t *config, *nvtop;
 	uint64_t txg;
 	char *pname;
 	int error;
 
 	/*
 	 * Read the label from the boot device and generate a configuration.
 	 */
 	config = spa_generate_rootconf(name);
 
 	mutex_enter(&spa_namespace_lock);
 	if (config != NULL) {
 		VERIFY(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
 		    &pname) == 0 && strcmp(name, pname) == 0);
 		VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, &txg)
 		    == 0);
 
 		if ((spa = spa_lookup(pname)) != NULL) {
 			/*
 			 * Remove the existing root pool from the namespace so
 			 * that we can replace it with the correct config
 			 * we just read in.
 			 */
 			spa_remove(spa);
 		}
 		spa = spa_add(pname, config, NULL);
 
 		/*
 		 * Set spa_ubsync.ub_version as it can be used in vdev_alloc()
 		 * via spa_version().
 		 */
 		if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
 		    &spa->spa_ubsync.ub_version) != 0)
 			spa->spa_ubsync.ub_version = SPA_VERSION_INITIAL;
 	} else if ((spa = spa_lookup(name)) == NULL) {
 		mutex_exit(&spa_namespace_lock);
 		nvlist_free(config);
 		cmn_err(CE_NOTE, "Cannot find the pool label for '%s'",
 		    name);
 		return (EIO);
 	} else {
 		VERIFY(nvlist_dup(spa->spa_config, &config, KM_SLEEP) == 0);
 	}
 	spa->spa_is_root = B_TRUE;
 	spa->spa_import_flags = ZFS_IMPORT_VERBATIM;
 
 	/*
 	 * Build up a vdev tree based on the boot device's label config.
 	 */
 	VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
 	    &nvtop) == 0);
 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
 	error = spa_config_parse(spa, &rvd, nvtop, NULL, 0,
 	    VDEV_ALLOC_ROOTPOOL);
 	spa_config_exit(spa, SCL_ALL, FTAG);
 	if (error) {
 		mutex_exit(&spa_namespace_lock);
 		nvlist_free(config);
 		cmn_err(CE_NOTE, "Can not parse the config for pool '%s'",
 		    pname);
 		return (error);
 	}
 
 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
 	vdev_free(rvd);
 	spa_config_exit(spa, SCL_ALL, FTAG);
 	mutex_exit(&spa_namespace_lock);
 
 	nvlist_free(config);
 	return (0);
 }
 
 #endif	/* illumos */
 #endif	/* _KERNEL */
 
 /*
  * Import a non-root pool into the system.
  */
 int
 spa_import(const char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags)
 {
 	spa_t *spa;
 	char *altroot = NULL;
 	spa_load_state_t state = SPA_LOAD_IMPORT;
 	zpool_rewind_policy_t policy;
 	uint64_t mode = spa_mode_global;
 	uint64_t readonly = B_FALSE;
 	int error;
 	nvlist_t *nvroot;
 	nvlist_t **spares, **l2cache;
 	uint_t nspares, nl2cache;
 
 	/*
 	 * If a pool with this name exists, return failure.
 	 */
 	mutex_enter(&spa_namespace_lock);
 	if (spa_lookup(pool) != NULL) {
 		mutex_exit(&spa_namespace_lock);
 		return (SET_ERROR(EEXIST));
 	}
 
 	/*
 	 * Create and initialize the spa structure.
 	 */
 	(void) nvlist_lookup_string(props,
 	    zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot);
 	(void) nvlist_lookup_uint64(props,
 	    zpool_prop_to_name(ZPOOL_PROP_READONLY), &readonly);
 	if (readonly)
 		mode = FREAD;
 	spa = spa_add(pool, config, altroot);
 	spa->spa_import_flags = flags;
 
 	/*
 	 * Verbatim import - Take a pool and insert it into the namespace
 	 * as if it had been loaded at boot.
 	 */
 	if (spa->spa_import_flags & ZFS_IMPORT_VERBATIM) {
 		if (props != NULL)
 			spa_configfile_set(spa, props, B_FALSE);
 
 		spa_config_sync(spa, B_FALSE, B_TRUE);
 		spa_event_notify(spa, NULL, ESC_ZFS_POOL_IMPORT);
 
 		mutex_exit(&spa_namespace_lock);
 		return (0);
 	}
 
 	spa_activate(spa, mode);
 
 	/*
 	 * Don't start async tasks until we know everything is healthy.
 	 */
 	spa_async_suspend(spa);
 
 	zpool_get_rewind_policy(config, &policy);
 	if (policy.zrp_request & ZPOOL_DO_REWIND)
 		state = SPA_LOAD_RECOVER;
 
 	/*
 	 * Pass off the heavy lifting to spa_load().  Pass TRUE for mosconfig
 	 * because the user-supplied config is actually the one to trust when
 	 * doing an import.
 	 */
 	if (state != SPA_LOAD_RECOVER)
 		spa->spa_last_ubsync_txg = spa->spa_load_txg = 0;
 
 	error = spa_load_best(spa, state, B_TRUE, policy.zrp_txg,
 	    policy.zrp_request);
 
 	/*
 	 * Propagate anything learned while loading the pool and pass it
 	 * back to caller (i.e. rewind info, missing devices, etc).
 	 */
 	VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_LOAD_INFO,
 	    spa->spa_load_info) == 0);
 
 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
 	/*
 	 * Toss any existing sparelist, as it doesn't have any validity
 	 * anymore, and conflicts with spa_has_spare().
 	 */
 	if (spa->spa_spares.sav_config) {
 		nvlist_free(spa->spa_spares.sav_config);
 		spa->spa_spares.sav_config = NULL;
 		spa_load_spares(spa);
 	}
 	if (spa->spa_l2cache.sav_config) {
 		nvlist_free(spa->spa_l2cache.sav_config);
 		spa->spa_l2cache.sav_config = NULL;
 		spa_load_l2cache(spa);
 	}
 
 	VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
 	    &nvroot) == 0);
 	if (error == 0)
 		error = spa_validate_aux(spa, nvroot, -1ULL,
 		    VDEV_ALLOC_SPARE);
 	if (error == 0)
 		error = spa_validate_aux(spa, nvroot, -1ULL,
 		    VDEV_ALLOC_L2CACHE);
 	spa_config_exit(spa, SCL_ALL, FTAG);
 
 	if (props != NULL)
 		spa_configfile_set(spa, props, B_FALSE);
 
 	if (error != 0 || (props && spa_writeable(spa) &&
 	    (error = spa_prop_set(spa, props)))) {
 		spa_unload(spa);
 		spa_deactivate(spa);
 		spa_remove(spa);
 		mutex_exit(&spa_namespace_lock);
 		return (error);
 	}
 
 	spa_async_resume(spa);
 
 	/*
 	 * Override any spares and level 2 cache devices as specified by
 	 * the user, as these may have correct device names/devids, etc.
 	 */
 	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
 	    &spares, &nspares) == 0) {
 		if (spa->spa_spares.sav_config)
 			VERIFY(nvlist_remove(spa->spa_spares.sav_config,
 			    ZPOOL_CONFIG_SPARES, DATA_TYPE_NVLIST_ARRAY) == 0);
 		else
 			VERIFY(nvlist_alloc(&spa->spa_spares.sav_config,
 			    NV_UNIQUE_NAME, KM_SLEEP) == 0);
 		VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config,
 		    ZPOOL_CONFIG_SPARES, spares, nspares) == 0);
 		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
 		spa_load_spares(spa);
 		spa_config_exit(spa, SCL_ALL, FTAG);
 		spa->spa_spares.sav_sync = B_TRUE;
 	}
 	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
 	    &l2cache, &nl2cache) == 0) {
 		if (spa->spa_l2cache.sav_config)
 			VERIFY(nvlist_remove(spa->spa_l2cache.sav_config,
 			    ZPOOL_CONFIG_L2CACHE, DATA_TYPE_NVLIST_ARRAY) == 0);
 		else
 			VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config,
 			    NV_UNIQUE_NAME, KM_SLEEP) == 0);
 		VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config,
 		    ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0);
 		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
 		spa_load_l2cache(spa);
 		spa_config_exit(spa, SCL_ALL, FTAG);
 		spa->spa_l2cache.sav_sync = B_TRUE;
 	}
 
 	/*
 	 * Check for any removed devices.
 	 */
 	if (spa->spa_autoreplace) {
 		spa_aux_check_removed(&spa->spa_spares);
 		spa_aux_check_removed(&spa->spa_l2cache);
 	}
 
 	if (spa_writeable(spa)) {
 		/*
 		 * Update the config cache to include the newly-imported pool.
 		 */
 		spa_config_update(spa, SPA_CONFIG_UPDATE_POOL);
 	}
 
 	/*
 	 * It's possible that the pool was expanded while it was exported.
 	 * We kick off an async task to handle this for us.
 	 */
 	spa_async_request(spa, SPA_ASYNC_AUTOEXPAND);
 
 	spa_history_log_version(spa, "import");
 
 	spa_event_notify(spa, NULL, ESC_ZFS_POOL_IMPORT);
 
 	mutex_exit(&spa_namespace_lock);
 
 #ifdef __FreeBSD__
 #ifdef _KERNEL
 	zvol_create_minors(pool);
 #endif
 #endif
 	return (0);
 }
 
 nvlist_t *
 spa_tryimport(nvlist_t *tryconfig)
 {
 	nvlist_t *config = NULL;
 	char *poolname;
 	spa_t *spa;
 	uint64_t state;
 	int error;
 
 	if (nvlist_lookup_string(tryconfig, ZPOOL_CONFIG_POOL_NAME, &poolname))
 		return (NULL);
 
 	if (nvlist_lookup_uint64(tryconfig, ZPOOL_CONFIG_POOL_STATE, &state))
 		return (NULL);
 
 	/*
 	 * Create and initialize the spa structure.
 	 */
 	mutex_enter(&spa_namespace_lock);
 	spa = spa_add(TRYIMPORT_NAME, tryconfig, NULL);
 	spa_activate(spa, FREAD);
 
 	/*
 	 * Pass off the heavy lifting to spa_load().
 	 * Pass TRUE for mosconfig because the user-supplied config
 	 * is actually the one to trust when doing an import.
 	 */
 	error = spa_load(spa, SPA_LOAD_TRYIMPORT, SPA_IMPORT_EXISTING, B_TRUE);
 
 	/*
 	 * If 'tryconfig' was at least parsable, return the current config.
 	 */
 	if (spa->spa_root_vdev != NULL) {
 		config = spa_config_generate(spa, NULL, -1ULL, B_TRUE);
 		VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME,
 		    poolname) == 0);
 		VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE,
 		    state) == 0);
 		VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TIMESTAMP,
 		    spa->spa_uberblock.ub_timestamp) == 0);
 		VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_LOAD_INFO,
 		    spa->spa_load_info) == 0);
 
 		/*
 		 * If the bootfs property exists on this pool then we
 		 * copy it out so that external consumers can tell which
 		 * pools are bootable.
 		 */
 		if ((!error || error == EEXIST) && spa->spa_bootfs) {
 			char *tmpname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
 
 			/*
 			 * We have to play games with the name since the
 			 * pool was opened as TRYIMPORT_NAME.
 			 */
 			if (dsl_dsobj_to_dsname(spa_name(spa),
 			    spa->spa_bootfs, tmpname) == 0) {
 				char *cp;
 				char *dsname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
 
 				cp = strchr(tmpname, '/');
 				if (cp == NULL) {
 					(void) strlcpy(dsname, tmpname,
 					    MAXPATHLEN);
 				} else {
 					(void) snprintf(dsname, MAXPATHLEN,
 					    "%s/%s", poolname, ++cp);
 				}
 				VERIFY(nvlist_add_string(config,
 				    ZPOOL_CONFIG_BOOTFS, dsname) == 0);
 				kmem_free(dsname, MAXPATHLEN);
 			}
 			kmem_free(tmpname, MAXPATHLEN);
 		}
 
 		/*
 		 * Add the list of hot spares and level 2 cache devices.
 		 */
 		spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
 		spa_add_spares(spa, config);
 		spa_add_l2cache(spa, config);
 		spa_config_exit(spa, SCL_CONFIG, FTAG);
 	}
 
 	spa_unload(spa);
 	spa_deactivate(spa);
 	spa_remove(spa);
 	mutex_exit(&spa_namespace_lock);
 
 	return (config);
 }
 
 /*
  * Pool export/destroy
  *
  * The act of destroying or exporting a pool is very simple.  We make sure there
  * is no more pending I/O and any references to the pool are gone.  Then, we
  * update the pool state and sync all the labels to disk, removing the
  * configuration from the cache afterwards. If the 'hardforce' flag is set, then
  * we don't sync the labels or remove the configuration cache.
  */
 static int
 spa_export_common(char *pool, int new_state, nvlist_t **oldconfig,
     boolean_t force, boolean_t hardforce)
 {
 	spa_t *spa;
 
 	if (oldconfig)
 		*oldconfig = NULL;
 
 	if (!(spa_mode_global & FWRITE))
 		return (SET_ERROR(EROFS));
 
 	mutex_enter(&spa_namespace_lock);
 	if ((spa = spa_lookup(pool)) == NULL) {
 		mutex_exit(&spa_namespace_lock);
 		return (SET_ERROR(ENOENT));
 	}
 
 	/*
 	 * Put a hold on the pool, drop the namespace lock, stop async tasks,
 	 * reacquire the namespace lock, and see if we can export.
 	 */
 	spa_open_ref(spa, FTAG);
 	mutex_exit(&spa_namespace_lock);
 	spa_async_suspend(spa);
 	mutex_enter(&spa_namespace_lock);
 	spa_close(spa, FTAG);
 
 	/*
 	 * The pool will be in core if it's openable,
 	 * in which case we can modify its state.
 	 */
 	if (spa->spa_state != POOL_STATE_UNINITIALIZED && spa->spa_sync_on) {
 		/*
 		 * Objsets may be open only because they're dirty, so we
 		 * have to force it to sync before checking spa_refcnt.
 		 */
 		txg_wait_synced(spa->spa_dsl_pool, 0);
 		spa_evicting_os_wait(spa);
 
 		/*
 		 * A pool cannot be exported or destroyed if there are active
 		 * references.  If we are resetting a pool, allow references by
 		 * fault injection handlers.
 		 */
 		if (!spa_refcount_zero(spa) ||
 		    (spa->spa_inject_ref != 0 &&
 		    new_state != POOL_STATE_UNINITIALIZED)) {
 			spa_async_resume(spa);
 			mutex_exit(&spa_namespace_lock);
 			return (SET_ERROR(EBUSY));
 		}
 
 		/*
 		 * A pool cannot be exported if it has an active shared spare.
 		 * This is to prevent other pools stealing the active spare
 		 * from an exported pool. At user's own will, such pool can
 		 * be forcedly exported.
 		 */
 		if (!force && new_state == POOL_STATE_EXPORTED &&
 		    spa_has_active_shared_spare(spa)) {
 			spa_async_resume(spa);
 			mutex_exit(&spa_namespace_lock);
 			return (SET_ERROR(EXDEV));
 		}
 
 		/*
 		 * We want this to be reflected on every label,
 		 * so mark them all dirty.  spa_unload() will do the
 		 * final sync that pushes these changes out.
 		 */
 		if (new_state != POOL_STATE_UNINITIALIZED && !hardforce) {
 			spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
 			spa->spa_state = new_state;
 			spa->spa_final_txg = spa_last_synced_txg(spa) +
 			    TXG_DEFER_SIZE + 1;
 			vdev_config_dirty(spa->spa_root_vdev);
 			spa_config_exit(spa, SCL_ALL, FTAG);
 		}
 	}
 
 	spa_event_notify(spa, NULL, ESC_ZFS_POOL_DESTROY);
 
 	if (spa->spa_state != POOL_STATE_UNINITIALIZED) {
 		spa_unload(spa);
 		spa_deactivate(spa);
 	}
 
 	if (oldconfig && spa->spa_config)
 		VERIFY(nvlist_dup(spa->spa_config, oldconfig, 0) == 0);
 
 	if (new_state != POOL_STATE_UNINITIALIZED) {
 		if (!hardforce)
 			spa_config_sync(spa, B_TRUE, B_TRUE);
 		spa_remove(spa);
 	}
 	mutex_exit(&spa_namespace_lock);
 
 	return (0);
 }
 
 /*
  * Destroy a storage pool.
  */
 int
 spa_destroy(char *pool)
 {
 	return (spa_export_common(pool, POOL_STATE_DESTROYED, NULL,
 	    B_FALSE, B_FALSE));
 }
 
 /*
  * Export a storage pool.
  */
 int
 spa_export(char *pool, nvlist_t **oldconfig, boolean_t force,
     boolean_t hardforce)
 {
 	return (spa_export_common(pool, POOL_STATE_EXPORTED, oldconfig,
 	    force, hardforce));
 }
 
 /*
  * Similar to spa_export(), this unloads the spa_t without actually removing it
  * from the namespace in any way.
  */
 int
 spa_reset(char *pool)
 {
 	return (spa_export_common(pool, POOL_STATE_UNINITIALIZED, NULL,
 	    B_FALSE, B_FALSE));
 }
 
 /*
  * ==========================================================================
  * Device manipulation
  * ==========================================================================
  */
 
 /*
  * Add a device to a storage pool.
  */
 int
 spa_vdev_add(spa_t *spa, nvlist_t *nvroot)
 {
 	uint64_t txg, id;
 	int error;
 	vdev_t *rvd = spa->spa_root_vdev;
 	vdev_t *vd, *tvd;
 	nvlist_t **spares, **l2cache;
 	uint_t nspares, nl2cache;
 
 	ASSERT(spa_writeable(spa));
 
 	txg = spa_vdev_enter(spa);
 
 	if ((error = spa_config_parse(spa, &vd, nvroot, NULL, 0,
 	    VDEV_ALLOC_ADD)) != 0)
 		return (spa_vdev_exit(spa, NULL, txg, error));
 
 	spa->spa_pending_vdev = vd;	/* spa_vdev_exit() will clear this */
 
 	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares,
 	    &nspares) != 0)
 		nspares = 0;
 
 	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, &l2cache,
 	    &nl2cache) != 0)
 		nl2cache = 0;
 
 	if (vd->vdev_children == 0 && nspares == 0 && nl2cache == 0)
 		return (spa_vdev_exit(spa, vd, txg, EINVAL));
 
 	if (vd->vdev_children != 0 &&
 	    (error = vdev_create(vd, txg, B_FALSE)) != 0)
 		return (spa_vdev_exit(spa, vd, txg, error));
 
 	/*
 	 * We must validate the spares and l2cache devices after checking the
 	 * children.  Otherwise, vdev_inuse() will blindly overwrite the spare.
 	 */
 	if ((error = spa_validate_aux(spa, nvroot, txg, VDEV_ALLOC_ADD)) != 0)
 		return (spa_vdev_exit(spa, vd, txg, error));
 
 	/*
 	 * Transfer each new top-level vdev from vd to rvd.
 	 */
 	for (int c = 0; c < vd->vdev_children; c++) {
 
 		/*
 		 * Set the vdev id to the first hole, if one exists.
 		 */
 		for (id = 0; id < rvd->vdev_children; id++) {
 			if (rvd->vdev_child[id]->vdev_ishole) {
 				vdev_free(rvd->vdev_child[id]);
 				break;
 			}
 		}
 		tvd = vd->vdev_child[c];
 		vdev_remove_child(vd, tvd);
 		tvd->vdev_id = id;
 		vdev_add_child(rvd, tvd);
 		vdev_config_dirty(tvd);
 	}
 
 	if (nspares != 0) {
 		spa_set_aux_vdevs(&spa->spa_spares, spares, nspares,
 		    ZPOOL_CONFIG_SPARES);
 		spa_load_spares(spa);
 		spa->spa_spares.sav_sync = B_TRUE;
 	}
 
 	if (nl2cache != 0) {
 		spa_set_aux_vdevs(&spa->spa_l2cache, l2cache, nl2cache,
 		    ZPOOL_CONFIG_L2CACHE);
 		spa_load_l2cache(spa);
 		spa->spa_l2cache.sav_sync = B_TRUE;
 	}
 
 	/*
 	 * We have to be careful when adding new vdevs to an existing pool.
 	 * If other threads start allocating from these vdevs before we
 	 * sync the config cache, and we lose power, then upon reboot we may
 	 * fail to open the pool because there are DVAs that the config cache
 	 * can't translate.  Therefore, we first add the vdevs without
 	 * initializing metaslabs; sync the config cache (via spa_vdev_exit());
 	 * and then let spa_config_update() initialize the new metaslabs.
 	 *
 	 * spa_load() checks for added-but-not-initialized vdevs, so that
 	 * if we lose power at any point in this sequence, the remaining
 	 * steps will be completed the next time we load the pool.
 	 */
 	(void) spa_vdev_exit(spa, vd, txg, 0);
 
 	mutex_enter(&spa_namespace_lock);
 	spa_config_update(spa, SPA_CONFIG_UPDATE_POOL);
 	spa_event_notify(spa, NULL, ESC_ZFS_VDEV_ADD);
 	mutex_exit(&spa_namespace_lock);
 
 	return (0);
 }
 
 /*
  * Attach a device to a mirror.  The arguments are the path to any device
  * in the mirror, and the nvroot for the new device.  If the path specifies
  * a device that is not mirrored, we automatically insert the mirror vdev.
  *
  * If 'replacing' is specified, the new device is intended to replace the
  * existing device; in this case the two devices are made into their own
  * mirror using the 'replacing' vdev, which is functionally identical to
  * the mirror vdev (it actually reuses all the same ops) but has a few
  * extra rules: you can't attach to it after it's been created, and upon
  * completion of resilvering, the first disk (the one being replaced)
  * is automatically detached.
  */
 int
 spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
 {
 	uint64_t txg, dtl_max_txg;
 	vdev_t *rvd = spa->spa_root_vdev;
 	vdev_t *oldvd, *newvd, *newrootvd, *pvd, *tvd;
 	vdev_ops_t *pvops;
 	char *oldvdpath, *newvdpath;
 	int newvd_isspare;
 	int error;
 
 	ASSERT(spa_writeable(spa));
 
 	txg = spa_vdev_enter(spa);
 
 	oldvd = spa_lookup_by_guid(spa, guid, B_FALSE);
 
 	if (oldvd == NULL)
 		return (spa_vdev_exit(spa, NULL, txg, ENODEV));
 
 	if (!oldvd->vdev_ops->vdev_op_leaf)
 		return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
 
 	pvd = oldvd->vdev_parent;
 
 	if ((error = spa_config_parse(spa, &newrootvd, nvroot, NULL, 0,
 	    VDEV_ALLOC_ATTACH)) != 0)
 		return (spa_vdev_exit(spa, NULL, txg, EINVAL));
 
 	if (newrootvd->vdev_children != 1)
 		return (spa_vdev_exit(spa, newrootvd, txg, EINVAL));
 
 	newvd = newrootvd->vdev_child[0];
 
 	if (!newvd->vdev_ops->vdev_op_leaf)
 		return (spa_vdev_exit(spa, newrootvd, txg, EINVAL));
 
 	if ((error = vdev_create(newrootvd, txg, replacing)) != 0)
 		return (spa_vdev_exit(spa, newrootvd, txg, error));
 
 	/*
 	 * Spares can't replace logs
 	 */
 	if (oldvd->vdev_top->vdev_islog && newvd->vdev_isspare)
 		return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
 
 	if (!replacing) {
 		/*
 		 * For attach, the only allowable parent is a mirror or the root
 		 * vdev.
 		 */
 		if (pvd->vdev_ops != &vdev_mirror_ops &&
 		    pvd->vdev_ops != &vdev_root_ops)
 			return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
 
 		pvops = &vdev_mirror_ops;
 	} else {
 		/*
 		 * Active hot spares can only be replaced by inactive hot
 		 * spares.
 		 */
 		if (pvd->vdev_ops == &vdev_spare_ops &&
 		    oldvd->vdev_isspare &&
 		    !spa_has_spare(spa, newvd->vdev_guid))
 			return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
 
 		/*
 		 * If the source is a hot spare, and the parent isn't already a
 		 * spare, then we want to create a new hot spare.  Otherwise, we
 		 * want to create a replacing vdev.  The user is not allowed to
 		 * attach to a spared vdev child unless the 'isspare' state is
 		 * the same (spare replaces spare, non-spare replaces
 		 * non-spare).
 		 */
 		if (pvd->vdev_ops == &vdev_replacing_ops &&
 		    spa_version(spa) < SPA_VERSION_MULTI_REPLACE) {
 			return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
 		} else if (pvd->vdev_ops == &vdev_spare_ops &&
 		    newvd->vdev_isspare != oldvd->vdev_isspare) {
 			return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
 		}
 
 		if (newvd->vdev_isspare)
 			pvops = &vdev_spare_ops;
 		else
 			pvops = &vdev_replacing_ops;
 	}
 
 	/*
 	 * Make sure the new device is big enough.
 	 */
 	if (newvd->vdev_asize < vdev_get_min_asize(oldvd))
 		return (spa_vdev_exit(spa, newrootvd, txg, EOVERFLOW));
 
 	/*
 	 * The new device cannot have a higher alignment requirement
 	 * than the top-level vdev.
 	 */
 	if (newvd->vdev_ashift > oldvd->vdev_top->vdev_ashift)
 		return (spa_vdev_exit(spa, newrootvd, txg, EDOM));
 
 	/*
 	 * If this is an in-place replacement, update oldvd's path and devid
 	 * to make it distinguishable from newvd, and unopenable from now on.
 	 */
 	if (strcmp(oldvd->vdev_path, newvd->vdev_path) == 0) {
 		spa_strfree(oldvd->vdev_path);
 		oldvd->vdev_path = kmem_alloc(strlen(newvd->vdev_path) + 5,
 		    KM_SLEEP);
 		(void) sprintf(oldvd->vdev_path, "%s/%s",
 		    newvd->vdev_path, "old");
 		if (oldvd->vdev_devid != NULL) {
 			spa_strfree(oldvd->vdev_devid);
 			oldvd->vdev_devid = NULL;
 		}
 	}
 
 	/* mark the device being resilvered */
 	newvd->vdev_resilver_txg = txg;
 
 	/*
 	 * If the parent is not a mirror, or if we're replacing, insert the new
 	 * mirror/replacing/spare vdev above oldvd.
 	 */
 	if (pvd->vdev_ops != pvops)
 		pvd = vdev_add_parent(oldvd, pvops);
 
 	ASSERT(pvd->vdev_top->vdev_parent == rvd);
 	ASSERT(pvd->vdev_ops == pvops);
 	ASSERT(oldvd->vdev_parent == pvd);
 
 	/*
 	 * Extract the new device from its root and add it to pvd.
 	 */
 	vdev_remove_child(newrootvd, newvd);
 	newvd->vdev_id = pvd->vdev_children;
 	newvd->vdev_crtxg = oldvd->vdev_crtxg;
 	vdev_add_child(pvd, newvd);
 
 	tvd = newvd->vdev_top;
 	ASSERT(pvd->vdev_top == tvd);
 	ASSERT(tvd->vdev_parent == rvd);
 
 	vdev_config_dirty(tvd);
 
 	/*
 	 * Set newvd's DTL to [TXG_INITIAL, dtl_max_txg) so that we account
 	 * for any dmu_sync-ed blocks.  It will propagate upward when
 	 * spa_vdev_exit() calls vdev_dtl_reassess().
 	 */
 	dtl_max_txg = txg + TXG_CONCURRENT_STATES;
 
 	vdev_dtl_dirty(newvd, DTL_MISSING, TXG_INITIAL,
 	    dtl_max_txg - TXG_INITIAL);
 
 	if (newvd->vdev_isspare) {
 		spa_spare_activate(newvd);
 		spa_event_notify(spa, newvd, ESC_ZFS_VDEV_SPARE);
 	}
 
 	oldvdpath = spa_strdup(oldvd->vdev_path);
 	newvdpath = spa_strdup(newvd->vdev_path);
 	newvd_isspare = newvd->vdev_isspare;
 
 	/*
 	 * Mark newvd's DTL dirty in this txg.
 	 */
 	vdev_dirty(tvd, VDD_DTL, newvd, txg);
 
 	/*
 	 * Schedule the resilver to restart in the future. We do this to
 	 * ensure that dmu_sync-ed blocks have been stitched into the
 	 * respective datasets.
 	 */
 	dsl_resilver_restart(spa->spa_dsl_pool, dtl_max_txg);
 
 	if (spa->spa_bootfs)
 		spa_event_notify(spa, newvd, ESC_ZFS_BOOTFS_VDEV_ATTACH);
 
 	spa_event_notify(spa, newvd, ESC_ZFS_VDEV_ATTACH);
 
 	/*
 	 * Commit the config
 	 */
 	(void) spa_vdev_exit(spa, newrootvd, dtl_max_txg, 0);
 
 	spa_history_log_internal(spa, "vdev attach", NULL,
 	    "%s vdev=%s %s vdev=%s",
 	    replacing && newvd_isspare ? "spare in" :
 	    replacing ? "replace" : "attach", newvdpath,
 	    replacing ? "for" : "to", oldvdpath);
 
 	spa_strfree(oldvdpath);
 	spa_strfree(newvdpath);
 
 	return (0);
 }
 
 /*
  * Detach a device from a mirror or replacing vdev.
  *
  * If 'replace_done' is specified, only detach if the parent
  * is a replacing vdev.
  */
 int
 spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done)
 {
 	uint64_t txg;
 	int error;
 	vdev_t *rvd = spa->spa_root_vdev;
 	vdev_t *vd, *pvd, *cvd, *tvd;
 	boolean_t unspare = B_FALSE;
 	uint64_t unspare_guid = 0;
 	char *vdpath;
 
 	ASSERT(spa_writeable(spa));
 
 	txg = spa_vdev_enter(spa);
 
 	vd = spa_lookup_by_guid(spa, guid, B_FALSE);
 
 	if (vd == NULL)
 		return (spa_vdev_exit(spa, NULL, txg, ENODEV));
 
 	if (!vd->vdev_ops->vdev_op_leaf)
 		return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
 
 	pvd = vd->vdev_parent;
 
 	/*
 	 * If the parent/child relationship is not as expected, don't do it.
 	 * Consider M(A,R(B,C)) -- that is, a mirror of A with a replacing
 	 * vdev that's replacing B with C.  The user's intent in replacing
 	 * is to go from M(A,B) to M(A,C).  If the user decides to cancel
 	 * the replace by detaching C, the expected behavior is to end up
 	 * M(A,B).  But suppose that right after deciding to detach C,
 	 * the replacement of B completes.  We would have M(A,C), and then
 	 * ask to detach C, which would leave us with just A -- not what
 	 * the user wanted.  To prevent this, we make sure that the
 	 * parent/child relationship hasn't changed -- in this example,
 	 * that C's parent is still the replacing vdev R.
 	 */
 	if (pvd->vdev_guid != pguid && pguid != 0)
 		return (spa_vdev_exit(spa, NULL, txg, EBUSY));
 
 	/*
 	 * Only 'replacing' or 'spare' vdevs can be replaced.
 	 */
 	if (replace_done && pvd->vdev_ops != &vdev_replacing_ops &&
 	    pvd->vdev_ops != &vdev_spare_ops)
 		return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
 
 	ASSERT(pvd->vdev_ops != &vdev_spare_ops ||
 	    spa_version(spa) >= SPA_VERSION_SPARES);
 
 	/*
 	 * Only mirror, replacing, and spare vdevs support detach.
 	 */
 	if (pvd->vdev_ops != &vdev_replacing_ops &&
 	    pvd->vdev_ops != &vdev_mirror_ops &&
 	    pvd->vdev_ops != &vdev_spare_ops)
 		return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
 
 	/*
 	 * If this device has the only valid copy of some data,
 	 * we cannot safely detach it.
 	 */
 	if (vdev_dtl_required(vd))
 		return (spa_vdev_exit(spa, NULL, txg, EBUSY));
 
 	ASSERT(pvd->vdev_children >= 2);
 
 	/*
 	 * If we are detaching the second disk from a replacing vdev, then
 	 * check to see if we changed the original vdev's path to have "/old"
 	 * at the end in spa_vdev_attach().  If so, undo that change now.
 	 */
 	if (pvd->vdev_ops == &vdev_replacing_ops && vd->vdev_id > 0 &&
 	    vd->vdev_path != NULL) {
 		size_t len = strlen(vd->vdev_path);
 
 		for (int c = 0; c < pvd->vdev_children; c++) {
 			cvd = pvd->vdev_child[c];
 
 			if (cvd == vd || cvd->vdev_path == NULL)
 				continue;
 
 			if (strncmp(cvd->vdev_path, vd->vdev_path, len) == 0 &&
 			    strcmp(cvd->vdev_path + len, "/old") == 0) {
 				spa_strfree(cvd->vdev_path);
 				cvd->vdev_path = spa_strdup(vd->vdev_path);
 				break;
 			}
 		}
 	}
 
 	/*
 	 * If we are detaching the original disk from a spare, then it implies
 	 * that the spare should become a real disk, and be removed from the
 	 * active spare list for the pool.
 	 */
 	if (pvd->vdev_ops == &vdev_spare_ops &&
 	    vd->vdev_id == 0 &&
 	    pvd->vdev_child[pvd->vdev_children - 1]->vdev_isspare)
 		unspare = B_TRUE;
 
 	/*
 	 * Erase the disk labels so the disk can be used for other things.
 	 * This must be done after all other error cases are handled,
 	 * but before we disembowel vd (so we can still do I/O to it).
 	 * But if we can't do it, don't treat the error as fatal --
 	 * it may be that the unwritability of the disk is the reason
 	 * it's being detached!
 	 */
 	error = vdev_label_init(vd, 0, VDEV_LABEL_REMOVE);
 
 	/*
 	 * Remove vd from its parent and compact the parent's children.
 	 */
 	vdev_remove_child(pvd, vd);
 	vdev_compact_children(pvd);
 
 	/*
 	 * Remember one of the remaining children so we can get tvd below.
 	 */
 	cvd = pvd->vdev_child[pvd->vdev_children - 1];
 
 	/*
 	 * If we need to remove the remaining child from the list of hot spares,
 	 * do it now, marking the vdev as no longer a spare in the process.
 	 * We must do this before vdev_remove_parent(), because that can
 	 * change the GUID if it creates a new toplevel GUID.  For a similar
 	 * reason, we must remove the spare now, in the same txg as the detach;
 	 * otherwise someone could attach a new sibling, change the GUID, and
 	 * the subsequent attempt to spa_vdev_remove(unspare_guid) would fail.
 	 */
 	if (unspare) {
 		ASSERT(cvd->vdev_isspare);
 		spa_spare_remove(cvd);
 		unspare_guid = cvd->vdev_guid;
 		(void) spa_vdev_remove(spa, unspare_guid, B_TRUE);
 		cvd->vdev_unspare = B_TRUE;
 	}
 
 	/*
 	 * If the parent mirror/replacing vdev only has one child,
 	 * the parent is no longer needed.  Remove it from the tree.
 	 */
 	if (pvd->vdev_children == 1) {
 		if (pvd->vdev_ops == &vdev_spare_ops)
 			cvd->vdev_unspare = B_FALSE;
 		vdev_remove_parent(cvd);
 	}
 
 
 	/*
 	 * We don't set tvd until now because the parent we just removed
 	 * may have been the previous top-level vdev.
 	 */
 	tvd = cvd->vdev_top;
 	ASSERT(tvd->vdev_parent == rvd);
 
 	/*
 	 * Reevaluate the parent vdev state.
 	 */
 	vdev_propagate_state(cvd);
 
 	/*
 	 * If the 'autoexpand' property is set on the pool then automatically
 	 * try to expand the size of the pool. For example if the device we
 	 * just detached was smaller than the others, it may be possible to
 	 * add metaslabs (i.e. grow the pool). We need to reopen the vdev
 	 * first so that we can obtain the updated sizes of the leaf vdevs.
 	 */
 	if (spa->spa_autoexpand) {
 		vdev_reopen(tvd);
 		vdev_expand(tvd, txg);
 	}
 
 	vdev_config_dirty(tvd);
 
 	/*
 	 * Mark vd's DTL as dirty in this txg.  vdev_dtl_sync() will see that
 	 * vd->vdev_detached is set and free vd's DTL object in syncing context.
 	 * But first make sure we're not on any *other* txg's DTL list, to
 	 * prevent vd from being accessed after it's freed.
 	 */
 	vdpath = spa_strdup(vd->vdev_path);
 	for (int t = 0; t < TXG_SIZE; t++)
 		(void) txg_list_remove_this(&tvd->vdev_dtl_list, vd, t);
 	vd->vdev_detached = B_TRUE;
 	vdev_dirty(tvd, VDD_DTL, vd, txg);
 
 	spa_event_notify(spa, vd, ESC_ZFS_VDEV_REMOVE);
 
 	/* hang on to the spa before we release the lock */
 	spa_open_ref(spa, FTAG);
 
 	error = spa_vdev_exit(spa, vd, txg, 0);
 
 	spa_history_log_internal(spa, "detach", NULL,
 	    "vdev=%s", vdpath);
 	spa_strfree(vdpath);
 
 	/*
 	 * If this was the removal of the original device in a hot spare vdev,
 	 * then we want to go through and remove the device from the hot spare
 	 * list of every other pool.
 	 */
 	if (unspare) {
 		spa_t *altspa = NULL;
 
 		mutex_enter(&spa_namespace_lock);
 		while ((altspa = spa_next(altspa)) != NULL) {
 			if (altspa->spa_state != POOL_STATE_ACTIVE ||
 			    altspa == spa)
 				continue;
 
 			spa_open_ref(altspa, FTAG);
 			mutex_exit(&spa_namespace_lock);
 			(void) spa_vdev_remove(altspa, unspare_guid, B_TRUE);
 			mutex_enter(&spa_namespace_lock);
 			spa_close(altspa, FTAG);
 		}
 		mutex_exit(&spa_namespace_lock);
 
 		/* search the rest of the vdevs for spares to remove */
 		spa_vdev_resilver_done(spa);
 	}
 
 	/* all done with the spa; OK to release */
 	mutex_enter(&spa_namespace_lock);
 	spa_close(spa, FTAG);
 	mutex_exit(&spa_namespace_lock);
 
 	return (error);
 }
 
 /*
  * Split a set of devices from their mirrors, and create a new pool from them.
  */
 int
 spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config,
     nvlist_t *props, boolean_t exp)
 {
 	int error = 0;
 	uint64_t txg, *glist;
 	spa_t *newspa;
 	uint_t c, children, lastlog;
 	nvlist_t **child, *nvl, *tmp;
 	dmu_tx_t *tx;
 	char *altroot = NULL;
 	vdev_t *rvd, **vml = NULL;			/* vdev modify list */
 	boolean_t activate_slog;
 
 	ASSERT(spa_writeable(spa));
 
 	txg = spa_vdev_enter(spa);
 
 	/* clear the log and flush everything up to now */
 	activate_slog = spa_passivate_log(spa);
 	(void) spa_vdev_config_exit(spa, NULL, txg, 0, FTAG);
 	error = spa_offline_log(spa);
 	txg = spa_vdev_config_enter(spa);
 
 	if (activate_slog)
 		spa_activate_log(spa);
 
 	if (error != 0)
 		return (spa_vdev_exit(spa, NULL, txg, error));
 
 	/* check new spa name before going any further */
 	if (spa_lookup(newname) != NULL)
 		return (spa_vdev_exit(spa, NULL, txg, EEXIST));
 
 	/*
 	 * scan through all the children to ensure they're all mirrors
 	 */
 	if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvl) != 0 ||
 	    nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_CHILDREN, &child,
 	    &children) != 0)
 		return (spa_vdev_exit(spa, NULL, txg, EINVAL));
 
 	/* first, check to ensure we've got the right child count */
 	rvd = spa->spa_root_vdev;
 	lastlog = 0;
 	for (c = 0; c < rvd->vdev_children; c++) {
 		vdev_t *vd = rvd->vdev_child[c];
 
 		/* don't count the holes & logs as children */
 		if (vd->vdev_islog || vd->vdev_ishole) {
 			if (lastlog == 0)
 				lastlog = c;
 			continue;
 		}
 
 		lastlog = 0;
 	}
 	if (children != (lastlog != 0 ? lastlog : rvd->vdev_children))
 		return (spa_vdev_exit(spa, NULL, txg, EINVAL));
 
 	/* next, ensure no spare or cache devices are part of the split */
 	if (nvlist_lookup_nvlist(nvl, ZPOOL_CONFIG_SPARES, &tmp) == 0 ||
 	    nvlist_lookup_nvlist(nvl, ZPOOL_CONFIG_L2CACHE, &tmp) == 0)
 		return (spa_vdev_exit(spa, NULL, txg, EINVAL));
 
 	vml = kmem_zalloc(children * sizeof (vdev_t *), KM_SLEEP);
 	glist = kmem_zalloc(children * sizeof (uint64_t), KM_SLEEP);
 
 	/* then, loop over each vdev and validate it */
 	for (c = 0; c < children; c++) {
 		uint64_t is_hole = 0;
 
 		(void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_HOLE,
 		    &is_hole);
 
 		if (is_hole != 0) {
 			if (spa->spa_root_vdev->vdev_child[c]->vdev_ishole ||
 			    spa->spa_root_vdev->vdev_child[c]->vdev_islog) {
 				continue;
 			} else {
 				error = SET_ERROR(EINVAL);
 				break;
 			}
 		}
 
 		/* which disk is going to be split? */
 		if (nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_GUID,
 		    &glist[c]) != 0) {
 			error = SET_ERROR(EINVAL);
 			break;
 		}
 
 		/* look it up in the spa */
 		vml[c] = spa_lookup_by_guid(spa, glist[c], B_FALSE);
 		if (vml[c] == NULL) {
 			error = SET_ERROR(ENODEV);
 			break;
 		}
 
 		/* make sure there's nothing stopping the split */
 		if (vml[c]->vdev_parent->vdev_ops != &vdev_mirror_ops ||
 		    vml[c]->vdev_islog ||
 		    vml[c]->vdev_ishole ||
 		    vml[c]->vdev_isspare ||
 		    vml[c]->vdev_isl2cache ||
 		    !vdev_writeable(vml[c]) ||
 		    vml[c]->vdev_children != 0 ||
 		    vml[c]->vdev_state != VDEV_STATE_HEALTHY ||
 		    c != spa->spa_root_vdev->vdev_child[c]->vdev_id) {
 			error = SET_ERROR(EINVAL);
 			break;
 		}
 
 		if (vdev_dtl_required(vml[c])) {
 			error = SET_ERROR(EBUSY);
 			break;
 		}
 
 		/* we need certain info from the top level */
 		VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_METASLAB_ARRAY,
 		    vml[c]->vdev_top->vdev_ms_array) == 0);
 		VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_METASLAB_SHIFT,
 		    vml[c]->vdev_top->vdev_ms_shift) == 0);
 		VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_ASIZE,
 		    vml[c]->vdev_top->vdev_asize) == 0);
 		VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_ASHIFT,
 		    vml[c]->vdev_top->vdev_ashift) == 0);
 
 		/* transfer per-vdev ZAPs */
 		ASSERT3U(vml[c]->vdev_leaf_zap, !=, 0);
 		VERIFY0(nvlist_add_uint64(child[c],
 		    ZPOOL_CONFIG_VDEV_LEAF_ZAP, vml[c]->vdev_leaf_zap));
 
 		ASSERT3U(vml[c]->vdev_top->vdev_top_zap, !=, 0);
 		VERIFY0(nvlist_add_uint64(child[c],
 		    ZPOOL_CONFIG_VDEV_TOP_ZAP,
 		    vml[c]->vdev_parent->vdev_top_zap));
 	}
 
 	if (error != 0) {
 		kmem_free(vml, children * sizeof (vdev_t *));
 		kmem_free(glist, children * sizeof (uint64_t));
 		return (spa_vdev_exit(spa, NULL, txg, error));
 	}
 
 	/* stop writers from using the disks */
 	for (c = 0; c < children; c++) {
 		if (vml[c] != NULL)
 			vml[c]->vdev_offline = B_TRUE;
 	}
 	vdev_reopen(spa->spa_root_vdev);
 
 	/*
 	 * Temporarily record the splitting vdevs in the spa config.  This
 	 * will disappear once the config is regenerated.
 	 */
 	VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 	VERIFY(nvlist_add_uint64_array(nvl, ZPOOL_CONFIG_SPLIT_LIST,
 	    glist, children) == 0);
 	kmem_free(glist, children * sizeof (uint64_t));
 
 	mutex_enter(&spa->spa_props_lock);
 	VERIFY(nvlist_add_nvlist(spa->spa_config, ZPOOL_CONFIG_SPLIT,
 	    nvl) == 0);
 	mutex_exit(&spa->spa_props_lock);
 	spa->spa_config_splitting = nvl;
 	vdev_config_dirty(spa->spa_root_vdev);
 
 	/* configure and create the new pool */
 	VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, newname) == 0);
 	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE,
 	    exp ? POOL_STATE_EXPORTED : POOL_STATE_ACTIVE) == 0);
 	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_VERSION,
 	    spa_version(spa)) == 0);
 	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_TXG,
 	    spa->spa_config_txg) == 0);
 	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID,
 	    spa_generate_guid(NULL)) == 0);
 	VERIFY0(nvlist_add_boolean(config, ZPOOL_CONFIG_HAS_PER_VDEV_ZAPS));
 	(void) nvlist_lookup_string(props,
 	    zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot);
 
 	/* add the new pool to the namespace */
 	newspa = spa_add(newname, config, altroot);
 	newspa->spa_avz_action = AVZ_ACTION_REBUILD;
 	newspa->spa_config_txg = spa->spa_config_txg;
 	spa_set_log_state(newspa, SPA_LOG_CLEAR);
 
 	/* release the spa config lock, retaining the namespace lock */
 	spa_vdev_config_exit(spa, NULL, txg, 0, FTAG);
 
 	if (zio_injection_enabled)
 		zio_handle_panic_injection(spa, FTAG, 1);
 
 	spa_activate(newspa, spa_mode_global);
 	spa_async_suspend(newspa);
 
 #ifndef illumos
 	/* mark that we are creating new spa by splitting */
 	newspa->spa_splitting_newspa = B_TRUE;
 #endif
 	/* create the new pool from the disks of the original pool */
 	error = spa_load(newspa, SPA_LOAD_IMPORT, SPA_IMPORT_ASSEMBLE, B_TRUE);
 #ifndef illumos
 	newspa->spa_splitting_newspa = B_FALSE;
 #endif
 	if (error)
 		goto out;
 
 	/* if that worked, generate a real config for the new pool */
 	if (newspa->spa_root_vdev != NULL) {
 		VERIFY(nvlist_alloc(&newspa->spa_config_splitting,
 		    NV_UNIQUE_NAME, KM_SLEEP) == 0);
 		VERIFY(nvlist_add_uint64(newspa->spa_config_splitting,
 		    ZPOOL_CONFIG_SPLIT_GUID, spa_guid(spa)) == 0);
 		spa_config_set(newspa, spa_config_generate(newspa, NULL, -1ULL,
 		    B_TRUE));
 	}
 
 	/* set the props */
 	if (props != NULL) {
 		spa_configfile_set(newspa, props, B_FALSE);
 		error = spa_prop_set(newspa, props);
 		if (error)
 			goto out;
 	}
 
 	/* flush everything */
 	txg = spa_vdev_config_enter(newspa);
 	vdev_config_dirty(newspa->spa_root_vdev);
 	(void) spa_vdev_config_exit(newspa, NULL, txg, 0, FTAG);
 
 	if (zio_injection_enabled)
 		zio_handle_panic_injection(spa, FTAG, 2);
 
 	spa_async_resume(newspa);
 
 	/* finally, update the original pool's config */
 	txg = spa_vdev_config_enter(spa);
 	tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
 	error = dmu_tx_assign(tx, TXG_WAIT);
 	if (error != 0)
 		dmu_tx_abort(tx);
 	for (c = 0; c < children; c++) {
 		if (vml[c] != NULL) {
 			vdev_split(vml[c]);
 			if (error == 0)
 				spa_history_log_internal(spa, "detach", tx,
 				    "vdev=%s", vml[c]->vdev_path);
 
 			vdev_free(vml[c]);
 		}
 	}
 	spa->spa_avz_action = AVZ_ACTION_REBUILD;
 	vdev_config_dirty(spa->spa_root_vdev);
 	spa->spa_config_splitting = NULL;
 	nvlist_free(nvl);
 	if (error == 0)
 		dmu_tx_commit(tx);
 	(void) spa_vdev_exit(spa, NULL, txg, 0);
 
 	if (zio_injection_enabled)
 		zio_handle_panic_injection(spa, FTAG, 3);
 
 	/* split is complete; log a history record */
 	spa_history_log_internal(newspa, "split", NULL,
 	    "from pool %s", spa_name(spa));
 
 	kmem_free(vml, children * sizeof (vdev_t *));
 
 	/* if we're not going to mount the filesystems in userland, export */
 	if (exp)
 		error = spa_export_common(newname, POOL_STATE_EXPORTED, NULL,
 		    B_FALSE, B_FALSE);
 
 	return (error);
 
 out:
 	spa_unload(newspa);
 	spa_deactivate(newspa);
 	spa_remove(newspa);
 
 	txg = spa_vdev_config_enter(spa);
 
 	/* re-online all offlined disks */
 	for (c = 0; c < children; c++) {
 		if (vml[c] != NULL)
 			vml[c]->vdev_offline = B_FALSE;
 	}
 	vdev_reopen(spa->spa_root_vdev);
 
 	nvlist_free(spa->spa_config_splitting);
 	spa->spa_config_splitting = NULL;
 	(void) spa_vdev_exit(spa, NULL, txg, error);
 
 	kmem_free(vml, children * sizeof (vdev_t *));
 	return (error);
 }
 
 static nvlist_t *
 spa_nvlist_lookup_by_guid(nvlist_t **nvpp, int count, uint64_t target_guid)
 {
 	for (int i = 0; i < count; i++) {
 		uint64_t guid;
 
 		VERIFY(nvlist_lookup_uint64(nvpp[i], ZPOOL_CONFIG_GUID,
 		    &guid) == 0);
 
 		if (guid == target_guid)
 			return (nvpp[i]);
 	}
 
 	return (NULL);
 }
 
 static void
 spa_vdev_remove_aux(nvlist_t *config, char *name, nvlist_t **dev, int count,
     nvlist_t *dev_to_remove)
 {
 	nvlist_t **newdev = NULL;
 
 	if (count > 1)
 		newdev = kmem_alloc((count - 1) * sizeof (void *), KM_SLEEP);
 
 	for (int i = 0, j = 0; i < count; i++) {
 		if (dev[i] == dev_to_remove)
 			continue;
 		VERIFY(nvlist_dup(dev[i], &newdev[j++], KM_SLEEP) == 0);
 	}
 
 	VERIFY(nvlist_remove(config, name, DATA_TYPE_NVLIST_ARRAY) == 0);
 	VERIFY(nvlist_add_nvlist_array(config, name, newdev, count - 1) == 0);
 
 	for (int i = 0; i < count - 1; i++)
 		nvlist_free(newdev[i]);
 
 	if (count > 1)
 		kmem_free(newdev, (count - 1) * sizeof (void *));
 }
 
 /*
  * Evacuate the device.
  */
 static int
 spa_vdev_remove_evacuate(spa_t *spa, vdev_t *vd)
 {
 	uint64_t txg;
 	int error = 0;
 
 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
 	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0);
 	ASSERT(vd == vd->vdev_top);
 
 	/*
 	 * Evacuate the device.  We don't hold the config lock as writer
 	 * since we need to do I/O but we do keep the
 	 * spa_namespace_lock held.  Once this completes the device
 	 * should no longer have any blocks allocated on it.
 	 */
 	if (vd->vdev_islog) {
 		if (vd->vdev_stat.vs_alloc != 0)
 			error = spa_offline_log(spa);
 	} else {
 		error = SET_ERROR(ENOTSUP);
 	}
 
 	if (error)
 		return (error);
 
 	/*
 	 * The evacuation succeeded.  Remove any remaining MOS metadata
 	 * associated with this vdev, and wait for these changes to sync.
 	 */
 	ASSERT0(vd->vdev_stat.vs_alloc);
 	txg = spa_vdev_config_enter(spa);
 	vd->vdev_removing = B_TRUE;
 	vdev_dirty_leaves(vd, VDD_DTL, txg);
 	vdev_config_dirty(vd);
 	spa_vdev_config_exit(spa, NULL, txg, 0, FTAG);
 
 	return (0);
 }
 
 /*
  * Complete the removal by cleaning up the namespace.
  */
 static void
 spa_vdev_remove_from_namespace(spa_t *spa, vdev_t *vd)
 {
 	vdev_t *rvd = spa->spa_root_vdev;
 	uint64_t id = vd->vdev_id;
 	boolean_t last_vdev = (id == (rvd->vdev_children - 1));
 
 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
 	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
 	ASSERT(vd == vd->vdev_top);
 
 	/*
 	 * Only remove any devices which are empty.
 	 */
 	if (vd->vdev_stat.vs_alloc != 0)
 		return;
 
 	(void) vdev_label_init(vd, 0, VDEV_LABEL_REMOVE);
 
 	if (list_link_active(&vd->vdev_state_dirty_node))
 		vdev_state_clean(vd);
 	if (list_link_active(&vd->vdev_config_dirty_node))
 		vdev_config_clean(vd);
 
 	vdev_free(vd);
 
 	if (last_vdev) {
 		vdev_compact_children(rvd);
 	} else {
 		vd = vdev_alloc_common(spa, id, 0, &vdev_hole_ops);
 		vdev_add_child(rvd, vd);
 	}
 	vdev_config_dirty(rvd);
 
 	/*
 	 * Reassess the health of our root vdev.
 	 */
 	vdev_reopen(rvd);
 }
 
 /*
  * Remove a device from the pool -
  *
  * Removing a device from the vdev namespace requires several steps
  * and can take a significant amount of time.  As a result we use
  * the spa_vdev_config_[enter/exit] functions which allow us to
  * grab and release the spa_config_lock while still holding the namespace
  * lock.  During each step the configuration is synced out.
  *
  * Currently, this supports removing only hot spares, slogs, and level 2 ARC
  * devices.
  */
 int
 spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare)
 {
 	vdev_t *vd;
 	sysevent_t *ev = NULL;
 	metaslab_group_t *mg;
 	nvlist_t **spares, **l2cache, *nv;
 	uint64_t txg = 0;
 	uint_t nspares, nl2cache;
 	int error = 0;
 	boolean_t locked = MUTEX_HELD(&spa_namespace_lock);
 
 	ASSERT(spa_writeable(spa));
 
 	if (!locked)
 		txg = spa_vdev_enter(spa);
 
 	vd = spa_lookup_by_guid(spa, guid, B_FALSE);
 
 	if (spa->spa_spares.sav_vdevs != NULL &&
 	    nvlist_lookup_nvlist_array(spa->spa_spares.sav_config,
 	    ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0 &&
 	    (nv = spa_nvlist_lookup_by_guid(spares, nspares, guid)) != NULL) {
 		/*
 		 * Only remove the hot spare if it's not currently in use
 		 * in this pool.
 		 */
 		if (vd == NULL || unspare) {
 			if (vd == NULL)
 				vd = spa_lookup_by_guid(spa, guid, B_TRUE);
 			ev = spa_event_create(spa, vd, ESC_ZFS_VDEV_REMOVE_AUX);
 			spa_vdev_remove_aux(spa->spa_spares.sav_config,
 			    ZPOOL_CONFIG_SPARES, spares, nspares, nv);
 			spa_load_spares(spa);
 			spa->spa_spares.sav_sync = B_TRUE;
 		} else {
 			error = SET_ERROR(EBUSY);
 		}
 	} else if (spa->spa_l2cache.sav_vdevs != NULL &&
 	    nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config,
 	    ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0 &&
 	    (nv = spa_nvlist_lookup_by_guid(l2cache, nl2cache, guid)) != NULL) {
 		/*
 		 * Cache devices can always be removed.
 		 */
 		vd = spa_lookup_by_guid(spa, guid, B_TRUE);
 		ev = spa_event_create(spa, vd, ESC_ZFS_VDEV_REMOVE_AUX);
 		spa_vdev_remove_aux(spa->spa_l2cache.sav_config,
 		    ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache, nv);
 		spa_load_l2cache(spa);
 		spa->spa_l2cache.sav_sync = B_TRUE;
 	} else if (vd != NULL && vd->vdev_islog) {
 		ASSERT(!locked);
 		ASSERT(vd == vd->vdev_top);
 
 		mg = vd->vdev_mg;
 
 		/*
 		 * Stop allocating from this vdev.
 		 */
 		metaslab_group_passivate(mg);
 
 		/*
 		 * Wait for the youngest allocations and frees to sync,
 		 * and then wait for the deferral of those frees to finish.
 		 */
 		spa_vdev_config_exit(spa, NULL,
 		    txg + TXG_CONCURRENT_STATES + TXG_DEFER_SIZE, 0, FTAG);
 
 		/*
 		 * Attempt to evacuate the vdev.
 		 */
 		error = spa_vdev_remove_evacuate(spa, vd);
 
 		txg = spa_vdev_config_enter(spa);
 
 		/*
 		 * If we couldn't evacuate the vdev, unwind.
 		 */
 		if (error) {
 			metaslab_group_activate(mg);
 			return (spa_vdev_exit(spa, NULL, txg, error));
 		}
 
 		/*
 		 * Clean up the vdev namespace.
 		 */
 		ev = spa_event_create(spa, vd, ESC_ZFS_VDEV_REMOVE_DEV);
 		spa_vdev_remove_from_namespace(spa, vd);
 
 	} else if (vd != NULL) {
 		/*
 		 * Normal vdevs cannot be removed (yet).
 		 */
 		error = SET_ERROR(ENOTSUP);
 	} else {
 		/*
 		 * There is no vdev of any kind with the specified guid.
 		 */
 		error = SET_ERROR(ENOENT);
 	}
 
 	if (!locked)
 		error = spa_vdev_exit(spa, NULL, txg, error);
 
 	if (ev)
 		spa_event_post(ev);
 
 	return (error);
 }
 
 /*
  * Find any device that's done replacing, or a vdev marked 'unspare' that's
  * currently spared, so we can detach it.
  */
 static vdev_t *
 spa_vdev_resilver_done_hunt(vdev_t *vd)
 {
 	vdev_t *newvd, *oldvd;
 
 	for (int c = 0; c < vd->vdev_children; c++) {
 		oldvd = spa_vdev_resilver_done_hunt(vd->vdev_child[c]);
 		if (oldvd != NULL)
 			return (oldvd);
 	}
 
 	/*
 	 * Check for a completed replacement.  We always consider the first
 	 * vdev in the list to be the oldest vdev, and the last one to be
 	 * the newest (see spa_vdev_attach() for how that works).  In
 	 * the case where the newest vdev is faulted, we will not automatically
 	 * remove it after a resilver completes.  This is OK as it will require
 	 * user intervention to determine which disk the admin wishes to keep.
 	 */
 	if (vd->vdev_ops == &vdev_replacing_ops) {
 		ASSERT(vd->vdev_children > 1);
 
 		newvd = vd->vdev_child[vd->vdev_children - 1];
 		oldvd = vd->vdev_child[0];
 
 		if (vdev_dtl_empty(newvd, DTL_MISSING) &&
 		    vdev_dtl_empty(newvd, DTL_OUTAGE) &&
 		    !vdev_dtl_required(oldvd))
 			return (oldvd);
 	}
 
 	/*
 	 * Check for a completed resilver with the 'unspare' flag set.
 	 */
 	if (vd->vdev_ops == &vdev_spare_ops) {
 		vdev_t *first = vd->vdev_child[0];
 		vdev_t *last = vd->vdev_child[vd->vdev_children - 1];
 
 		if (last->vdev_unspare) {
 			oldvd = first;
 			newvd = last;
 		} else if (first->vdev_unspare) {
 			oldvd = last;
 			newvd = first;
 		} else {
 			oldvd = NULL;
 		}
 
 		if (oldvd != NULL &&
 		    vdev_dtl_empty(newvd, DTL_MISSING) &&
 		    vdev_dtl_empty(newvd, DTL_OUTAGE) &&
 		    !vdev_dtl_required(oldvd))
 			return (oldvd);
 
 		/*
 		 * If there are more than two spares attached to a disk,
 		 * and those spares are not required, then we want to
 		 * attempt to free them up now so that they can be used
 		 * by other pools.  Once we're back down to a single
 		 * disk+spare, we stop removing them.
 		 */
 		if (vd->vdev_children > 2) {
 			newvd = vd->vdev_child[1];
 
 			if (newvd->vdev_isspare && last->vdev_isspare &&
 			    vdev_dtl_empty(last, DTL_MISSING) &&
 			    vdev_dtl_empty(last, DTL_OUTAGE) &&
 			    !vdev_dtl_required(newvd))
 				return (newvd);
 		}
 	}
 
 	return (NULL);
 }
 
 static void
 spa_vdev_resilver_done(spa_t *spa)
 {
 	vdev_t *vd, *pvd, *ppvd;
 	uint64_t guid, sguid, pguid, ppguid;
 
 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
 
 	while ((vd = spa_vdev_resilver_done_hunt(spa->spa_root_vdev)) != NULL) {
 		pvd = vd->vdev_parent;
 		ppvd = pvd->vdev_parent;
 		guid = vd->vdev_guid;
 		pguid = pvd->vdev_guid;
 		ppguid = ppvd->vdev_guid;
 		sguid = 0;
 		/*
 		 * If we have just finished replacing a hot spared device, then
 		 * we need to detach the parent's first child (the original hot
 		 * spare) as well.
 		 */
 		if (ppvd->vdev_ops == &vdev_spare_ops && pvd->vdev_id == 0 &&
 		    ppvd->vdev_children == 2) {
 			ASSERT(pvd->vdev_ops == &vdev_replacing_ops);
 			sguid = ppvd->vdev_child[1]->vdev_guid;
 		}
 		ASSERT(vd->vdev_resilver_txg == 0 || !vdev_dtl_required(vd));
 
 		spa_config_exit(spa, SCL_ALL, FTAG);
 		if (spa_vdev_detach(spa, guid, pguid, B_TRUE) != 0)
 			return;
 		if (sguid && spa_vdev_detach(spa, sguid, ppguid, B_TRUE) != 0)
 			return;
 		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
 	}
 
 	spa_config_exit(spa, SCL_ALL, FTAG);
 }
 
 /*
  * Update the stored path or FRU for this vdev.
  */
 int
 spa_vdev_set_common(spa_t *spa, uint64_t guid, const char *value,
     boolean_t ispath)
 {
 	vdev_t *vd;
 	boolean_t sync = B_FALSE;
 
 	ASSERT(spa_writeable(spa));
 
 	spa_vdev_state_enter(spa, SCL_ALL);
 
 	if ((vd = spa_lookup_by_guid(spa, guid, B_TRUE)) == NULL)
 		return (spa_vdev_state_exit(spa, NULL, ENOENT));
 
 	if (!vd->vdev_ops->vdev_op_leaf)
 		return (spa_vdev_state_exit(spa, NULL, ENOTSUP));
 
 	if (ispath) {
 		if (strcmp(value, vd->vdev_path) != 0) {
 			spa_strfree(vd->vdev_path);
 			vd->vdev_path = spa_strdup(value);
 			sync = B_TRUE;
 		}
 	} else {
 		if (vd->vdev_fru == NULL) {
 			vd->vdev_fru = spa_strdup(value);
 			sync = B_TRUE;
 		} else if (strcmp(value, vd->vdev_fru) != 0) {
 			spa_strfree(vd->vdev_fru);
 			vd->vdev_fru = spa_strdup(value);
 			sync = B_TRUE;
 		}
 	}
 
 	return (spa_vdev_state_exit(spa, sync ? vd : NULL, 0));
 }
 
 int
 spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath)
 {
 	return (spa_vdev_set_common(spa, guid, newpath, B_TRUE));
 }
 
 int
 spa_vdev_setfru(spa_t *spa, uint64_t guid, const char *newfru)
 {
 	return (spa_vdev_set_common(spa, guid, newfru, B_FALSE));
 }
 
 /*
  * ==========================================================================
  * SPA Scanning
  * ==========================================================================
  */
 
 int
 spa_scan_stop(spa_t *spa)
 {
 	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0);
 	if (dsl_scan_resilvering(spa->spa_dsl_pool))
 		return (SET_ERROR(EBUSY));
 	return (dsl_scan_cancel(spa->spa_dsl_pool));
 }
 
 int
 spa_scan(spa_t *spa, pool_scan_func_t func)
 {
 	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0);
 
 	if (func >= POOL_SCAN_FUNCS || func == POOL_SCAN_NONE)
 		return (SET_ERROR(ENOTSUP));
 
 	/*
 	 * If a resilver was requested, but there is no DTL on a
 	 * writeable leaf device, we have nothing to do.
 	 */
 	if (func == POOL_SCAN_RESILVER &&
 	    !vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL)) {
 		spa_async_request(spa, SPA_ASYNC_RESILVER_DONE);
 		return (0);
 	}
 
 	return (dsl_scan(spa->spa_dsl_pool, func));
 }
 
 /*
  * ==========================================================================
  * SPA async task processing
  * ==========================================================================
  */
 
 static void
 spa_async_remove(spa_t *spa, vdev_t *vd)
 {
 	if (vd->vdev_remove_wanted) {
 		vd->vdev_remove_wanted = B_FALSE;
 		vd->vdev_delayed_close = B_FALSE;
 		vdev_set_state(vd, B_FALSE, VDEV_STATE_REMOVED, VDEV_AUX_NONE);
 
 		/*
 		 * We want to clear the stats, but we don't want to do a full
 		 * vdev_clear() as that will cause us to throw away
 		 * degraded/faulted state as well as attempt to reopen the
 		 * device, all of which is a waste.
 		 */
 		vd->vdev_stat.vs_read_errors = 0;
 		vd->vdev_stat.vs_write_errors = 0;
 		vd->vdev_stat.vs_checksum_errors = 0;
 
 		vdev_state_dirty(vd->vdev_top);
 		/* Tell userspace that the vdev is gone. */
 		zfs_post_remove(spa, vd);
 	}
 
 	for (int c = 0; c < vd->vdev_children; c++)
 		spa_async_remove(spa, vd->vdev_child[c]);
 }
 
 static void
 spa_async_probe(spa_t *spa, vdev_t *vd)
 {
 	if (vd->vdev_probe_wanted) {
 		vd->vdev_probe_wanted = B_FALSE;
 		vdev_reopen(vd);	/* vdev_open() does the actual probe */
 	}
 
 	for (int c = 0; c < vd->vdev_children; c++)
 		spa_async_probe(spa, vd->vdev_child[c]);
 }
 
 static void
 spa_async_autoexpand(spa_t *spa, vdev_t *vd)
 {
 	sysevent_id_t eid;
 	nvlist_t *attr;
 	char *physpath;
 
 	if (!spa->spa_autoexpand)
 		return;
 
 	for (int c = 0; c < vd->vdev_children; c++) {
 		vdev_t *cvd = vd->vdev_child[c];
 		spa_async_autoexpand(spa, cvd);
 	}
 
 	if (!vd->vdev_ops->vdev_op_leaf || vd->vdev_physpath == NULL)
 		return;
 
 	physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
 	(void) snprintf(physpath, MAXPATHLEN, "/devices%s", vd->vdev_physpath);
 
 	VERIFY(nvlist_alloc(&attr, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 	VERIFY(nvlist_add_string(attr, DEV_PHYS_PATH, physpath) == 0);
 
 	(void) ddi_log_sysevent(zfs_dip, SUNW_VENDOR, EC_DEV_STATUS,
 	    ESC_ZFS_VDEV_AUTOEXPAND, attr, &eid, DDI_SLEEP);
 
 	nvlist_free(attr);
 	kmem_free(physpath, MAXPATHLEN);
 }
 
 static void
 spa_async_thread(void *arg)
 {
 	spa_t *spa = arg;
 	int tasks;
 
 	ASSERT(spa->spa_sync_on);
 
 	mutex_enter(&spa->spa_async_lock);
 	tasks = spa->spa_async_tasks;
 	spa->spa_async_tasks &= SPA_ASYNC_REMOVE;
 	mutex_exit(&spa->spa_async_lock);
 
 	/*
 	 * See if the config needs to be updated.
 	 */
 	if (tasks & SPA_ASYNC_CONFIG_UPDATE) {
 		uint64_t old_space, new_space;
 
 		mutex_enter(&spa_namespace_lock);
 		old_space = metaslab_class_get_space(spa_normal_class(spa));
 		spa_config_update(spa, SPA_CONFIG_UPDATE_POOL);
 		new_space = metaslab_class_get_space(spa_normal_class(spa));
 		mutex_exit(&spa_namespace_lock);
 
 		/*
 		 * If the pool grew as a result of the config update,
 		 * then log an internal history event.
 		 */
 		if (new_space != old_space) {
 			spa_history_log_internal(spa, "vdev online", NULL,
 			    "pool '%s' size: %llu(+%llu)",
 			    spa_name(spa), new_space, new_space - old_space);
 		}
 	}
 
 	if ((tasks & SPA_ASYNC_AUTOEXPAND) && !spa_suspended(spa)) {
 		spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
 		spa_async_autoexpand(spa, spa->spa_root_vdev);
 		spa_config_exit(spa, SCL_CONFIG, FTAG);
 	}
 
 	/*
 	 * See if any devices need to be probed.
 	 */
 	if (tasks & SPA_ASYNC_PROBE) {
 		spa_vdev_state_enter(spa, SCL_NONE);
 		spa_async_probe(spa, spa->spa_root_vdev);
 		(void) spa_vdev_state_exit(spa, NULL, 0);
 	}
 
 	/*
 	 * If any devices are done replacing, detach them.
 	 */
 	if (tasks & SPA_ASYNC_RESILVER_DONE)
 		spa_vdev_resilver_done(spa);
 
 	/*
 	 * Kick off a resilver.
 	 */
 	if (tasks & SPA_ASYNC_RESILVER)
 		dsl_resilver_restart(spa->spa_dsl_pool, 0);
 
 	/*
 	 * Let the world know that we're done.
 	 */
 	mutex_enter(&spa->spa_async_lock);
 	spa->spa_async_thread = NULL;
 	cv_broadcast(&spa->spa_async_cv);
 	mutex_exit(&spa->spa_async_lock);
 	thread_exit();
 }
 
 static void
 spa_async_thread_vd(void *arg)
 {
 	spa_t *spa = arg;
 	int tasks;
 
 	ASSERT(spa->spa_sync_on);
 
 	mutex_enter(&spa->spa_async_lock);
 	tasks = spa->spa_async_tasks;
 retry:
 	spa->spa_async_tasks &= ~SPA_ASYNC_REMOVE;
 	mutex_exit(&spa->spa_async_lock);
 
 	/*
 	 * See if any devices need to be marked REMOVED.
 	 */
 	if (tasks & SPA_ASYNC_REMOVE) {
 		spa_vdev_state_enter(spa, SCL_NONE);
 		spa_async_remove(spa, spa->spa_root_vdev);
 		for (int i = 0; i < spa->spa_l2cache.sav_count; i++)
 			spa_async_remove(spa, spa->spa_l2cache.sav_vdevs[i]);
 		for (int i = 0; i < spa->spa_spares.sav_count; i++)
 			spa_async_remove(spa, spa->spa_spares.sav_vdevs[i]);
 		(void) spa_vdev_state_exit(spa, NULL, 0);
 	}
 
 	/*
 	 * Let the world know that we're done.
 	 */
 	mutex_enter(&spa->spa_async_lock);
 	tasks = spa->spa_async_tasks;
 	if ((tasks & SPA_ASYNC_REMOVE) != 0)
 		goto retry;
 	spa->spa_async_thread_vd = NULL;
 	cv_broadcast(&spa->spa_async_cv);
 	mutex_exit(&spa->spa_async_lock);
 	thread_exit();
 }
 
 void
 spa_async_suspend(spa_t *spa)
 {
 	mutex_enter(&spa->spa_async_lock);
 	spa->spa_async_suspended++;
 	while (spa->spa_async_thread != NULL &&
 	    spa->spa_async_thread_vd != NULL)
 		cv_wait(&spa->spa_async_cv, &spa->spa_async_lock);
 	mutex_exit(&spa->spa_async_lock);
 }
 
 void
 spa_async_resume(spa_t *spa)
 {
 	mutex_enter(&spa->spa_async_lock);
 	ASSERT(spa->spa_async_suspended != 0);
 	spa->spa_async_suspended--;
 	mutex_exit(&spa->spa_async_lock);
 }
 
 static boolean_t
 spa_async_tasks_pending(spa_t *spa)
 {
 	uint_t non_config_tasks;
 	uint_t config_task;
 	boolean_t config_task_suspended;
 
 	non_config_tasks = spa->spa_async_tasks & ~(SPA_ASYNC_CONFIG_UPDATE |
 	    SPA_ASYNC_REMOVE);
 	config_task = spa->spa_async_tasks & SPA_ASYNC_CONFIG_UPDATE;
 	if (spa->spa_ccw_fail_time == 0) {
 		config_task_suspended = B_FALSE;
 	} else {
 		config_task_suspended =
 		    (gethrtime() - spa->spa_ccw_fail_time) <
 		    (zfs_ccw_retry_interval * NANOSEC);
 	}
 
 	return (non_config_tasks || (config_task && !config_task_suspended));
 }
 
 static void
 spa_async_dispatch(spa_t *spa)
 {
 	mutex_enter(&spa->spa_async_lock);
 	if (spa_async_tasks_pending(spa) &&
 	    !spa->spa_async_suspended &&
 	    spa->spa_async_thread == NULL &&
 	    rootdir != NULL)
 		spa->spa_async_thread = thread_create(NULL, 0,
 		    spa_async_thread, spa, 0, &p0, TS_RUN, maxclsyspri);
 	mutex_exit(&spa->spa_async_lock);
 }
 
 static void
 spa_async_dispatch_vd(spa_t *spa)
 {
 	mutex_enter(&spa->spa_async_lock);
 	if ((spa->spa_async_tasks & SPA_ASYNC_REMOVE) != 0 &&
 	    !spa->spa_async_suspended &&
 	    spa->spa_async_thread_vd == NULL &&
 	    rootdir != NULL)
 		spa->spa_async_thread_vd = thread_create(NULL, 0,
 		    spa_async_thread_vd, spa, 0, &p0, TS_RUN, maxclsyspri);
 	mutex_exit(&spa->spa_async_lock);
 }
 
 void
 spa_async_request(spa_t *spa, int task)
 {
 	zfs_dbgmsg("spa=%s async request task=%u", spa->spa_name, task);
 	mutex_enter(&spa->spa_async_lock);
 	spa->spa_async_tasks |= task;
 	mutex_exit(&spa->spa_async_lock);
 	spa_async_dispatch_vd(spa);
 }
 
 /*
  * ==========================================================================
  * SPA syncing routines
  * ==========================================================================
  */
 
 static int
 bpobj_enqueue_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
 {
 	bpobj_t *bpo = arg;
 	bpobj_enqueue(bpo, bp, tx);
 	return (0);
 }
 
 static int
 spa_free_sync_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
 {
 	zio_t *zio = arg;
 
 	zio_nowait(zio_free_sync(zio, zio->io_spa, dmu_tx_get_txg(tx), bp,
 	    BP_GET_PSIZE(bp), zio->io_flags));
 	return (0);
 }
 
 /*
  * Note: this simple function is not inlined to make it easier to dtrace the
  * amount of time spent syncing frees.
  */
 static void
 spa_sync_frees(spa_t *spa, bplist_t *bpl, dmu_tx_t *tx)
 {
 	zio_t *zio = zio_root(spa, NULL, NULL, 0);
 	bplist_iterate(bpl, spa_free_sync_cb, zio, tx);
 	VERIFY(zio_wait(zio) == 0);
 }
 
 /*
  * Note: this simple function is not inlined to make it easier to dtrace the
  * amount of time spent syncing deferred frees.
  */
 static void
 spa_sync_deferred_frees(spa_t *spa, dmu_tx_t *tx)
 {
 	zio_t *zio = zio_root(spa, NULL, NULL, 0);
 	VERIFY3U(bpobj_iterate(&spa->spa_deferred_bpobj,
 	    spa_free_sync_cb, zio, tx), ==, 0);
 	VERIFY0(zio_wait(zio));
 }
 
 
 static void
 spa_sync_nvlist(spa_t *spa, uint64_t obj, nvlist_t *nv, dmu_tx_t *tx)
 {
 	char *packed = NULL;
 	size_t bufsize;
 	size_t nvsize = 0;
 	dmu_buf_t *db;
 
 	VERIFY(nvlist_size(nv, &nvsize, NV_ENCODE_XDR) == 0);
 
 	/*
 	 * Write full (SPA_CONFIG_BLOCKSIZE) blocks of configuration
 	 * information.  This avoids the dmu_buf_will_dirty() path and
 	 * saves us a pre-read to get data we don't actually care about.
 	 */
 	bufsize = P2ROUNDUP((uint64_t)nvsize, SPA_CONFIG_BLOCKSIZE);
 	packed = kmem_alloc(bufsize, KM_SLEEP);
 
 	VERIFY(nvlist_pack(nv, &packed, &nvsize, NV_ENCODE_XDR,
 	    KM_SLEEP) == 0);
 	bzero(packed + nvsize, bufsize - nvsize);
 
 	dmu_write(spa->spa_meta_objset, obj, 0, bufsize, packed, tx);
 
 	kmem_free(packed, bufsize);
 
 	VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db));
 	dmu_buf_will_dirty(db, tx);
 	*(uint64_t *)db->db_data = nvsize;
 	dmu_buf_rele(db, FTAG);
 }
 
 static void
 spa_sync_aux_dev(spa_t *spa, spa_aux_vdev_t *sav, dmu_tx_t *tx,
     const char *config, const char *entry)
 {
 	nvlist_t *nvroot;
 	nvlist_t **list;
 	int i;
 
 	if (!sav->sav_sync)
 		return;
 
 	/*
 	 * Update the MOS nvlist describing the list of available devices.
 	 * spa_validate_aux() will have already made sure this nvlist is
 	 * valid and the vdevs are labeled appropriately.
 	 */
 	if (sav->sav_object == 0) {
 		sav->sav_object = dmu_object_alloc(spa->spa_meta_objset,
 		    DMU_OT_PACKED_NVLIST, 1 << 14, DMU_OT_PACKED_NVLIST_SIZE,
 		    sizeof (uint64_t), tx);
 		VERIFY(zap_update(spa->spa_meta_objset,
 		    DMU_POOL_DIRECTORY_OBJECT, entry, sizeof (uint64_t), 1,
 		    &sav->sav_object, tx) == 0);
 	}
 
 	VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 	if (sav->sav_count == 0) {
 		VERIFY(nvlist_add_nvlist_array(nvroot, config, NULL, 0) == 0);
 	} else {
 		list = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP);
 		for (i = 0; i < sav->sav_count; i++)
 			list[i] = vdev_config_generate(spa, sav->sav_vdevs[i],
 			    B_FALSE, VDEV_CONFIG_L2CACHE);
 		VERIFY(nvlist_add_nvlist_array(nvroot, config, list,
 		    sav->sav_count) == 0);
 		for (i = 0; i < sav->sav_count; i++)
 			nvlist_free(list[i]);
 		kmem_free(list, sav->sav_count * sizeof (void *));
 	}
 
 	spa_sync_nvlist(spa, sav->sav_object, nvroot, tx);
 	nvlist_free(nvroot);
 
 	sav->sav_sync = B_FALSE;
 }
 
 /*
  * Rebuild spa's all-vdev ZAP from the vdev ZAPs indicated in each vdev_t.
  * The all-vdev ZAP must be empty.
  */
 static void
 spa_avz_build(vdev_t *vd, uint64_t avz, dmu_tx_t *tx)
 {
 	spa_t *spa = vd->vdev_spa;
 	if (vd->vdev_top_zap != 0) {
 		VERIFY0(zap_add_int(spa->spa_meta_objset, avz,
 		    vd->vdev_top_zap, tx));
 	}
 	if (vd->vdev_leaf_zap != 0) {
 		VERIFY0(zap_add_int(spa->spa_meta_objset, avz,
 		    vd->vdev_leaf_zap, tx));
 	}
 	for (uint64_t i = 0; i < vd->vdev_children; i++) {
 		spa_avz_build(vd->vdev_child[i], avz, tx);
 	}
 }
 
 static void
 spa_sync_config_object(spa_t *spa, dmu_tx_t *tx)
 {
 	nvlist_t *config;
 
 	/*
 	 * If the pool is being imported from a pre-per-vdev-ZAP version of ZFS,
 	 * its config may not be dirty but we still need to build per-vdev ZAPs.
 	 * Similarly, if the pool is being assembled (e.g. after a split), we
 	 * need to rebuild the AVZ although the config may not be dirty.
 	 */
 	if (list_is_empty(&spa->spa_config_dirty_list) &&
 	    spa->spa_avz_action == AVZ_ACTION_NONE)
 		return;
 
 	spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
 
 	ASSERT(spa->spa_avz_action == AVZ_ACTION_NONE ||
 	    spa->spa_all_vdev_zaps != 0);
 
 	if (spa->spa_avz_action == AVZ_ACTION_REBUILD) {
 		/* Make and build the new AVZ */
 		uint64_t new_avz = zap_create(spa->spa_meta_objset,
 		    DMU_OTN_ZAP_METADATA, DMU_OT_NONE, 0, tx);
 		spa_avz_build(spa->spa_root_vdev, new_avz, tx);
 
 		/* Diff old AVZ with new one */
 		zap_cursor_t zc;
 		zap_attribute_t za;
 
 		for (zap_cursor_init(&zc, spa->spa_meta_objset,
 		    spa->spa_all_vdev_zaps);
 		    zap_cursor_retrieve(&zc, &za) == 0;
 		    zap_cursor_advance(&zc)) {
 			uint64_t vdzap = za.za_first_integer;
 			if (zap_lookup_int(spa->spa_meta_objset, new_avz,
 			    vdzap) == ENOENT) {
 				/*
 				 * ZAP is listed in old AVZ but not in new one;
 				 * destroy it
 				 */
 				VERIFY0(zap_destroy(spa->spa_meta_objset, vdzap,
 				    tx));
 			}
 		}
 
 		zap_cursor_fini(&zc);
 
 		/* Destroy the old AVZ */
 		VERIFY0(zap_destroy(spa->spa_meta_objset,
 		    spa->spa_all_vdev_zaps, tx));
 
 		/* Replace the old AVZ in the dir obj with the new one */
 		VERIFY0(zap_update(spa->spa_meta_objset,
 		    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_VDEV_ZAP_MAP,
 		    sizeof (new_avz), 1, &new_avz, tx));
 
 		spa->spa_all_vdev_zaps = new_avz;
 	} else if (spa->spa_avz_action == AVZ_ACTION_DESTROY) {
 		zap_cursor_t zc;
 		zap_attribute_t za;
 
 		/* Walk through the AVZ and destroy all listed ZAPs */
 		for (zap_cursor_init(&zc, spa->spa_meta_objset,
 		    spa->spa_all_vdev_zaps);
 		    zap_cursor_retrieve(&zc, &za) == 0;
 		    zap_cursor_advance(&zc)) {
 			uint64_t zap = za.za_first_integer;
 			VERIFY0(zap_destroy(spa->spa_meta_objset, zap, tx));
 		}
 
 		zap_cursor_fini(&zc);
 
 		/* Destroy and unlink the AVZ itself */
 		VERIFY0(zap_destroy(spa->spa_meta_objset,
 		    spa->spa_all_vdev_zaps, tx));
 		VERIFY0(zap_remove(spa->spa_meta_objset,
 		    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_VDEV_ZAP_MAP, tx));
 		spa->spa_all_vdev_zaps = 0;
 	}
 
 	if (spa->spa_all_vdev_zaps == 0) {
 		spa->spa_all_vdev_zaps = zap_create_link(spa->spa_meta_objset,
 		    DMU_OTN_ZAP_METADATA, DMU_POOL_DIRECTORY_OBJECT,
 		    DMU_POOL_VDEV_ZAP_MAP, tx);
 	}
 	spa->spa_avz_action = AVZ_ACTION_NONE;
 
 	/* Create ZAPs for vdevs that don't have them. */
 	vdev_construct_zaps(spa->spa_root_vdev, tx);
 
 	config = spa_config_generate(spa, spa->spa_root_vdev,
 	    dmu_tx_get_txg(tx), B_FALSE);
 
 	/*
 	 * If we're upgrading the spa version then make sure that
 	 * the config object gets updated with the correct version.
 	 */
 	if (spa->spa_ubsync.ub_version < spa->spa_uberblock.ub_version)
 		fnvlist_add_uint64(config, ZPOOL_CONFIG_VERSION,
 		    spa->spa_uberblock.ub_version);
 
 	spa_config_exit(spa, SCL_STATE, FTAG);
 
 	nvlist_free(spa->spa_config_syncing);
 	spa->spa_config_syncing = config;
 
 	spa_sync_nvlist(spa, spa->spa_config_object, config, tx);
 }
 
 static void
 spa_sync_version(void *arg, dmu_tx_t *tx)
 {
 	uint64_t *versionp = arg;
 	uint64_t version = *versionp;
 	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
 
 	/*
 	 * Setting the version is special cased when first creating the pool.
 	 */
 	ASSERT(tx->tx_txg != TXG_INITIAL);
 
 	ASSERT(SPA_VERSION_IS_SUPPORTED(version));
 	ASSERT(version >= spa_version(spa));
 
 	spa->spa_uberblock.ub_version = version;
 	vdev_config_dirty(spa->spa_root_vdev);
 	spa_history_log_internal(spa, "set", tx, "version=%lld", version);
 }
 
 /*
  * Set zpool properties.
  */
 static void
 spa_sync_props(void *arg, dmu_tx_t *tx)
 {
 	nvlist_t *nvp = arg;
 	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
 	objset_t *mos = spa->spa_meta_objset;
 	nvpair_t *elem = NULL;
 
 	mutex_enter(&spa->spa_props_lock);
 
 	while ((elem = nvlist_next_nvpair(nvp, elem))) {
 		uint64_t intval;
 		char *strval, *fname;
 		zpool_prop_t prop;
 		const char *propname;
 		zprop_type_t proptype;
 		spa_feature_t fid;
 
 		switch (prop = zpool_name_to_prop(nvpair_name(elem))) {
 		case ZPROP_INVAL:
 			/*
 			 * We checked this earlier in spa_prop_validate().
 			 */
 			ASSERT(zpool_prop_feature(nvpair_name(elem)));
 
 			fname = strchr(nvpair_name(elem), '@') + 1;
 			VERIFY0(zfeature_lookup_name(fname, &fid));
 
 			spa_feature_enable(spa, fid, tx);
 			spa_history_log_internal(spa, "set", tx,
 			    "%s=enabled", nvpair_name(elem));
 			break;
 
 		case ZPOOL_PROP_VERSION:
 			intval = fnvpair_value_uint64(elem);
 			/*
 			 * The version is synced seperatly before other
 			 * properties and should be correct by now.
 			 */
 			ASSERT3U(spa_version(spa), >=, intval);
 			break;
 
 		case ZPOOL_PROP_ALTROOT:
 			/*
 			 * 'altroot' is a non-persistent property. It should
 			 * have been set temporarily at creation or import time.
 			 */
 			ASSERT(spa->spa_root != NULL);
 			break;
 
 		case ZPOOL_PROP_READONLY:
 		case ZPOOL_PROP_CACHEFILE:
 			/*
 			 * 'readonly' and 'cachefile' are also non-persisitent
 			 * properties.
 			 */
 			break;
 		case ZPOOL_PROP_COMMENT:
 			strval = fnvpair_value_string(elem);
 			if (spa->spa_comment != NULL)
 				spa_strfree(spa->spa_comment);
 			spa->spa_comment = spa_strdup(strval);
 			/*
 			 * We need to dirty the configuration on all the vdevs
 			 * so that their labels get updated.  It's unnecessary
 			 * to do this for pool creation since the vdev's
 			 * configuratoin has already been dirtied.
 			 */
 			if (tx->tx_txg != TXG_INITIAL)
 				vdev_config_dirty(spa->spa_root_vdev);
 			spa_history_log_internal(spa, "set", tx,
 			    "%s=%s", nvpair_name(elem), strval);
 			break;
 		default:
 			/*
 			 * Set pool property values in the poolprops mos object.
 			 */
 			if (spa->spa_pool_props_object == 0) {
 				spa->spa_pool_props_object =
 				    zap_create_link(mos, DMU_OT_POOL_PROPS,
 				    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_PROPS,
 				    tx);
 			}
 
 			/* normalize the property name */
 			propname = zpool_prop_to_name(prop);
 			proptype = zpool_prop_get_type(prop);
 
 			if (nvpair_type(elem) == DATA_TYPE_STRING) {
 				ASSERT(proptype == PROP_TYPE_STRING);
 				strval = fnvpair_value_string(elem);
 				VERIFY0(zap_update(mos,
 				    spa->spa_pool_props_object, propname,
 				    1, strlen(strval) + 1, strval, tx));
 				spa_history_log_internal(spa, "set", tx,
 				    "%s=%s", nvpair_name(elem), strval);
 			} else if (nvpair_type(elem) == DATA_TYPE_UINT64) {
 				intval = fnvpair_value_uint64(elem);
 
 				if (proptype == PROP_TYPE_INDEX) {
 					const char *unused;
 					VERIFY0(zpool_prop_index_to_string(
 					    prop, intval, &unused));
 				}
 				VERIFY0(zap_update(mos,
 				    spa->spa_pool_props_object, propname,
 				    8, 1, &intval, tx));
 				spa_history_log_internal(spa, "set", tx,
 				    "%s=%lld", nvpair_name(elem), intval);
 			} else {
 				ASSERT(0); /* not allowed */
 			}
 
 			switch (prop) {
 			case ZPOOL_PROP_DELEGATION:
 				spa->spa_delegation = intval;
 				break;
 			case ZPOOL_PROP_BOOTFS:
 				spa->spa_bootfs = intval;
 				break;
 			case ZPOOL_PROP_FAILUREMODE:
 				spa->spa_failmode = intval;
 				break;
 			case ZPOOL_PROP_AUTOEXPAND:
 				spa->spa_autoexpand = intval;
 				if (tx->tx_txg != TXG_INITIAL)
 					spa_async_request(spa,
 					    SPA_ASYNC_AUTOEXPAND);
 				break;
 			case ZPOOL_PROP_DEDUPDITTO:
 				spa->spa_dedup_ditto = intval;
 				break;
 			default:
 				break;
 			}
 		}
 
 	}
 
 	mutex_exit(&spa->spa_props_lock);
 }
 
 /*
  * Perform one-time upgrade on-disk changes.  spa_version() does not
  * reflect the new version this txg, so there must be no changes this
  * txg to anything that the upgrade code depends on after it executes.
  * Therefore this must be called after dsl_pool_sync() does the sync
  * tasks.
  */
 static void
 spa_sync_upgrades(spa_t *spa, dmu_tx_t *tx)
 {
 	dsl_pool_t *dp = spa->spa_dsl_pool;
 
 	ASSERT(spa->spa_sync_pass == 1);
 
 	rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG);
 
 	if (spa->spa_ubsync.ub_version < SPA_VERSION_ORIGIN &&
 	    spa->spa_uberblock.ub_version >= SPA_VERSION_ORIGIN) {
 		dsl_pool_create_origin(dp, tx);
 
 		/* Keeping the origin open increases spa_minref */
 		spa->spa_minref += 3;
 	}
 
 	if (spa->spa_ubsync.ub_version < SPA_VERSION_NEXT_CLONES &&
 	    spa->spa_uberblock.ub_version >= SPA_VERSION_NEXT_CLONES) {
 		dsl_pool_upgrade_clones(dp, tx);
 	}
 
 	if (spa->spa_ubsync.ub_version < SPA_VERSION_DIR_CLONES &&
 	    spa->spa_uberblock.ub_version >= SPA_VERSION_DIR_CLONES) {
 		dsl_pool_upgrade_dir_clones(dp, tx);
 
 		/* Keeping the freedir open increases spa_minref */
 		spa->spa_minref += 3;
 	}
 
 	if (spa->spa_ubsync.ub_version < SPA_VERSION_FEATURES &&
 	    spa->spa_uberblock.ub_version >= SPA_VERSION_FEATURES) {
 		spa_feature_create_zap_objects(spa, tx);
 	}
 
 	/*
 	 * LZ4_COMPRESS feature's behaviour was changed to activate_on_enable
 	 * when possibility to use lz4 compression for metadata was added
 	 * Old pools that have this feature enabled must be upgraded to have
 	 * this feature active
 	 */
 	if (spa->spa_uberblock.ub_version >= SPA_VERSION_FEATURES) {
 		boolean_t lz4_en = spa_feature_is_enabled(spa,
 		    SPA_FEATURE_LZ4_COMPRESS);
 		boolean_t lz4_ac = spa_feature_is_active(spa,
 		    SPA_FEATURE_LZ4_COMPRESS);
 
 		if (lz4_en && !lz4_ac)
 			spa_feature_incr(spa, SPA_FEATURE_LZ4_COMPRESS, tx);
 	}
 
 	/*
 	 * If we haven't written the salt, do so now.  Note that the
 	 * feature may not be activated yet, but that's fine since
 	 * the presence of this ZAP entry is backwards compatible.
 	 */
 	if (zap_contains(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
 	    DMU_POOL_CHECKSUM_SALT) == ENOENT) {
 		VERIFY0(zap_add(spa->spa_meta_objset,
 		    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CHECKSUM_SALT, 1,
 		    sizeof (spa->spa_cksum_salt.zcs_bytes),
 		    spa->spa_cksum_salt.zcs_bytes, tx));
 	}
 
 	rrw_exit(&dp->dp_config_rwlock, FTAG);
 }
 
 /*
  * Sync the specified transaction group.  New blocks may be dirtied as
  * part of the process, so we iterate until it converges.
  */
 void
 spa_sync(spa_t *spa, uint64_t txg)
 {
 	dsl_pool_t *dp = spa->spa_dsl_pool;
 	objset_t *mos = spa->spa_meta_objset;
 	bplist_t *free_bpl = &spa->spa_free_bplist[txg & TXG_MASK];
 	vdev_t *rvd = spa->spa_root_vdev;
 	vdev_t *vd;
 	dmu_tx_t *tx;
 	int error;
 	uint32_t max_queue_depth = zfs_vdev_async_write_max_active *
 	    zfs_vdev_queue_depth_pct / 100;
 
 	VERIFY(spa_writeable(spa));
 
 	/*
 	 * Lock out configuration changes.
 	 */
 	spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
 
 	spa->spa_syncing_txg = txg;
 	spa->spa_sync_pass = 0;
 
 	mutex_enter(&spa->spa_alloc_lock);
 	VERIFY0(avl_numnodes(&spa->spa_alloc_tree));
 	mutex_exit(&spa->spa_alloc_lock);
 
 	/*
 	 * If there are any pending vdev state changes, convert them
 	 * into config changes that go out with this transaction group.
 	 */
 	spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
 	while (list_head(&spa->spa_state_dirty_list) != NULL) {
 		/*
 		 * We need the write lock here because, for aux vdevs,
 		 * calling vdev_config_dirty() modifies sav_config.
 		 * This is ugly and will become unnecessary when we
 		 * eliminate the aux vdev wart by integrating all vdevs
 		 * into the root vdev tree.
 		 */
 		spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG);
 		spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_WRITER);
 		while ((vd = list_head(&spa->spa_state_dirty_list)) != NULL) {
 			vdev_state_clean(vd);
 			vdev_config_dirty(vd);
 		}
 		spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG);
 		spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_READER);
 	}
 	spa_config_exit(spa, SCL_STATE, FTAG);
 
 	tx = dmu_tx_create_assigned(dp, txg);
 
 	spa->spa_sync_starttime = gethrtime();
 #ifdef illumos
 	VERIFY(cyclic_reprogram(spa->spa_deadman_cycid,
 	    spa->spa_sync_starttime + spa->spa_deadman_synctime));
 #else	/* !illumos */
 #ifdef _KERNEL
 	callout_reset(&spa->spa_deadman_cycid,
 	    hz * spa->spa_deadman_synctime / NANOSEC, spa_deadman, spa);
 #endif
 #endif	/* illumos */
 
 	/*
 	 * If we are upgrading to SPA_VERSION_RAIDZ_DEFLATE this txg,
 	 * set spa_deflate if we have no raid-z vdevs.
 	 */
 	if (spa->spa_ubsync.ub_version < SPA_VERSION_RAIDZ_DEFLATE &&
 	    spa->spa_uberblock.ub_version >= SPA_VERSION_RAIDZ_DEFLATE) {
 		int i;
 
 		for (i = 0; i < rvd->vdev_children; i++) {
 			vd = rvd->vdev_child[i];
 			if (vd->vdev_deflate_ratio != SPA_MINBLOCKSIZE)
 				break;
 		}
 		if (i == rvd->vdev_children) {
 			spa->spa_deflate = TRUE;
 			VERIFY(0 == zap_add(spa->spa_meta_objset,
 			    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE,
 			    sizeof (uint64_t), 1, &spa->spa_deflate, tx));
 		}
 	}
 
 	/*
 	 * Set the top-level vdev's max queue depth. Evaluate each
 	 * top-level's async write queue depth in case it changed.
 	 * The max queue depth will not change in the middle of syncing
 	 * out this txg.
 	 */
 	uint64_t queue_depth_total = 0;
 	for (int c = 0; c < rvd->vdev_children; c++) {
 		vdev_t *tvd = rvd->vdev_child[c];
 		metaslab_group_t *mg = tvd->vdev_mg;
 
 		if (mg == NULL || mg->mg_class != spa_normal_class(spa) ||
 		    !metaslab_group_initialized(mg))
 			continue;
 
 		/*
 		 * It is safe to do a lock-free check here because only async
 		 * allocations look at mg_max_alloc_queue_depth, and async
 		 * allocations all happen from spa_sync().
 		 */
 		ASSERT0(refcount_count(&mg->mg_alloc_queue_depth));
 		mg->mg_max_alloc_queue_depth = max_queue_depth;
 		queue_depth_total += mg->mg_max_alloc_queue_depth;
 	}
 	metaslab_class_t *mc = spa_normal_class(spa);
 	ASSERT0(refcount_count(&mc->mc_alloc_slots));
 	mc->mc_alloc_max_slots = queue_depth_total;
 	mc->mc_alloc_throttle_enabled = zio_dva_throttle_enabled;
 
 	ASSERT3U(mc->mc_alloc_max_slots, <=,
 	    max_queue_depth * rvd->vdev_children);
 
 	/*
 	 * Iterate to convergence.
 	 */
 	do {
 		int pass = ++spa->spa_sync_pass;
 
 		spa_sync_config_object(spa, tx);
 		spa_sync_aux_dev(spa, &spa->spa_spares, tx,
 		    ZPOOL_CONFIG_SPARES, DMU_POOL_SPARES);
 		spa_sync_aux_dev(spa, &spa->spa_l2cache, tx,
 		    ZPOOL_CONFIG_L2CACHE, DMU_POOL_L2CACHE);
 		spa_errlog_sync(spa, txg);
 		dsl_pool_sync(dp, txg);
 
 		if (pass < zfs_sync_pass_deferred_free) {
 			spa_sync_frees(spa, free_bpl, tx);
 		} else {
 			/*
 			 * We can not defer frees in pass 1, because
 			 * we sync the deferred frees later in pass 1.
 			 */
 			ASSERT3U(pass, >, 1);
 			bplist_iterate(free_bpl, bpobj_enqueue_cb,
 			    &spa->spa_deferred_bpobj, tx);
 		}
 
 		ddt_sync(spa, txg);
 		dsl_scan_sync(dp, tx);
 
 		while (vd = txg_list_remove(&spa->spa_vdev_txg_list, txg))
 			vdev_sync(vd, txg);
 
 		if (pass == 1) {
 			spa_sync_upgrades(spa, tx);
 			ASSERT3U(txg, >=,
 			    spa->spa_uberblock.ub_rootbp.blk_birth);
 			/*
 			 * Note: We need to check if the MOS is dirty
 			 * because we could have marked the MOS dirty
 			 * without updating the uberblock (e.g. if we
 			 * have sync tasks but no dirty user data).  We
 			 * need to check the uberblock's rootbp because
 			 * it is updated if we have synced out dirty
 			 * data (though in this case the MOS will most
 			 * likely also be dirty due to second order
 			 * effects, we don't want to rely on that here).
 			 */
 			if (spa->spa_uberblock.ub_rootbp.blk_birth < txg &&
 			    !dmu_objset_is_dirty(mos, txg)) {
 				/*
 				 * Nothing changed on the first pass,
 				 * therefore this TXG is a no-op.  Avoid
 				 * syncing deferred frees, so that we
 				 * can keep this TXG as a no-op.
 				 */
 				ASSERT(txg_list_empty(&dp->dp_dirty_datasets,
 				    txg));
 				ASSERT(txg_list_empty(&dp->dp_dirty_dirs, txg));
 				ASSERT(txg_list_empty(&dp->dp_sync_tasks, txg));
 				break;
 			}
 			spa_sync_deferred_frees(spa, tx);
 		}
 
 	} while (dmu_objset_is_dirty(mos, txg));
 
 	if (!list_is_empty(&spa->spa_config_dirty_list)) {
 		/*
 		 * Make sure that the number of ZAPs for all the vdevs matches
 		 * the number of ZAPs in the per-vdev ZAP list. This only gets
 		 * called if the config is dirty; otherwise there may be
 		 * outstanding AVZ operations that weren't completed in
 		 * spa_sync_config_object.
 		 */
 		uint64_t all_vdev_zap_entry_count;
 		ASSERT0(zap_count(spa->spa_meta_objset,
 		    spa->spa_all_vdev_zaps, &all_vdev_zap_entry_count));
 		ASSERT3U(vdev_count_verify_zaps(spa->spa_root_vdev), ==,
 		    all_vdev_zap_entry_count);
 	}
 
 	/*
 	 * Rewrite the vdev configuration (which includes the uberblock)
 	 * to commit the transaction group.
 	 *
 	 * If there are no dirty vdevs, we sync the uberblock to a few
 	 * random top-level vdevs that are known to be visible in the
 	 * config cache (see spa_vdev_add() for a complete description).
 	 * If there *are* dirty vdevs, sync the uberblock to all vdevs.
 	 */
 	for (;;) {
 		/*
 		 * We hold SCL_STATE to prevent vdev open/close/etc.
 		 * while we're attempting to write the vdev labels.
 		 */
 		spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
 
 		if (list_is_empty(&spa->spa_config_dirty_list)) {
 			vdev_t *svd[SPA_DVAS_PER_BP];
 			int svdcount = 0;
 			int children = rvd->vdev_children;
 			int c0 = spa_get_random(children);
 
 			for (int c = 0; c < children; c++) {
 				vd = rvd->vdev_child[(c0 + c) % children];
 				if (vd->vdev_ms_array == 0 || vd->vdev_islog)
 					continue;
 				svd[svdcount++] = vd;
 				if (svdcount == SPA_DVAS_PER_BP)
 					break;
 			}
 			error = vdev_config_sync(svd, svdcount, txg);
 		} else {
 			error = vdev_config_sync(rvd->vdev_child,
 			    rvd->vdev_children, txg);
 		}
 
 		if (error == 0)
 			spa->spa_last_synced_guid = rvd->vdev_guid;
 
 		spa_config_exit(spa, SCL_STATE, FTAG);
 
 		if (error == 0)
 			break;
 		zio_suspend(spa, NULL);
 		zio_resume_wait(spa);
 	}
 	dmu_tx_commit(tx);
 
 #ifdef illumos
 	VERIFY(cyclic_reprogram(spa->spa_deadman_cycid, CY_INFINITY));
 #else	/* !illumos */
 #ifdef _KERNEL
 	callout_drain(&spa->spa_deadman_cycid);
 #endif
 #endif	/* illumos */
 
 	/*
 	 * Clear the dirty config list.
 	 */
 	while ((vd = list_head(&spa->spa_config_dirty_list)) != NULL)
 		vdev_config_clean(vd);
 
 	/*
 	 * Now that the new config has synced transactionally,
 	 * let it become visible to the config cache.
 	 */
 	if (spa->spa_config_syncing != NULL) {
 		spa_config_set(spa, spa->spa_config_syncing);
 		spa->spa_config_txg = txg;
 		spa->spa_config_syncing = NULL;
 	}
 
-	spa->spa_ubsync = spa->spa_uberblock;
-
 	dsl_pool_sync_done(dp, txg);
 
 	mutex_enter(&spa->spa_alloc_lock);
 	VERIFY0(avl_numnodes(&spa->spa_alloc_tree));
 	mutex_exit(&spa->spa_alloc_lock);
 
 	/*
 	 * Update usable space statistics.
 	 */
 	while (vd = txg_list_remove(&spa->spa_vdev_txg_list, TXG_CLEAN(txg)))
 		vdev_sync_done(vd, txg);
 
 	spa_update_dspace(spa);
 
 	/*
 	 * It had better be the case that we didn't dirty anything
 	 * since vdev_config_sync().
 	 */
 	ASSERT(txg_list_empty(&dp->dp_dirty_datasets, txg));
 	ASSERT(txg_list_empty(&dp->dp_dirty_dirs, txg));
 	ASSERT(txg_list_empty(&spa->spa_vdev_txg_list, txg));
 
 	spa->spa_sync_pass = 0;
 
+	/*
+	 * Update the last synced uberblock here. We want to do this at
+	 * the end of spa_sync() so that consumers of spa_last_synced_txg()
+	 * will be guaranteed that all the processing associated with
+	 * that txg has been completed.
+	 */
+	spa->spa_ubsync = spa->spa_uberblock;
 	spa_config_exit(spa, SCL_CONFIG, FTAG);
 
 	spa_handle_ignored_writes(spa);
 
 	/*
 	 * If any async tasks have been requested, kick them off.
 	 */
 	spa_async_dispatch(spa);
 	spa_async_dispatch_vd(spa);
 }
 
 /*
  * Sync all pools.  We don't want to hold the namespace lock across these
  * operations, so we take a reference on the spa_t and drop the lock during the
  * sync.
  */
 void
 spa_sync_allpools(void)
 {
 	spa_t *spa = NULL;
 	mutex_enter(&spa_namespace_lock);
 	while ((spa = spa_next(spa)) != NULL) {
 		if (spa_state(spa) != POOL_STATE_ACTIVE ||
 		    !spa_writeable(spa) || spa_suspended(spa))
 			continue;
 		spa_open_ref(spa, FTAG);
 		mutex_exit(&spa_namespace_lock);
 		txg_wait_synced(spa_get_dsl(spa), 0);
 		mutex_enter(&spa_namespace_lock);
 		spa_close(spa, FTAG);
 	}
 	mutex_exit(&spa_namespace_lock);
 }
 
 /*
  * ==========================================================================
  * Miscellaneous routines
  * ==========================================================================
  */
 
 /*
  * Remove all pools in the system.
  */
 void
 spa_evict_all(void)
 {
 	spa_t *spa;
 
 	/*
 	 * Remove all cached state.  All pools should be closed now,
 	 * so every spa in the AVL tree should be unreferenced.
 	 */
 	mutex_enter(&spa_namespace_lock);
 	while ((spa = spa_next(NULL)) != NULL) {
 		/*
 		 * Stop async tasks.  The async thread may need to detach
 		 * a device that's been replaced, which requires grabbing
 		 * spa_namespace_lock, so we must drop it here.
 		 */
 		spa_open_ref(spa, FTAG);
 		mutex_exit(&spa_namespace_lock);
 		spa_async_suspend(spa);
 		mutex_enter(&spa_namespace_lock);
 		spa_close(spa, FTAG);
 
 		if (spa->spa_state != POOL_STATE_UNINITIALIZED) {
 			spa_unload(spa);
 			spa_deactivate(spa);
 		}
 		spa_remove(spa);
 	}
 	mutex_exit(&spa_namespace_lock);
 }
 
 vdev_t *
 spa_lookup_by_guid(spa_t *spa, uint64_t guid, boolean_t aux)
 {
 	vdev_t *vd;
 	int i;
 
 	if ((vd = vdev_lookup_by_guid(spa->spa_root_vdev, guid)) != NULL)
 		return (vd);
 
 	if (aux) {
 		for (i = 0; i < spa->spa_l2cache.sav_count; i++) {
 			vd = spa->spa_l2cache.sav_vdevs[i];
 			if (vd->vdev_guid == guid)
 				return (vd);
 		}
 
 		for (i = 0; i < spa->spa_spares.sav_count; i++) {
 			vd = spa->spa_spares.sav_vdevs[i];
 			if (vd->vdev_guid == guid)
 				return (vd);
 		}
 	}
 
 	return (NULL);
 }
 
 void
 spa_upgrade(spa_t *spa, uint64_t version)
 {
 	ASSERT(spa_writeable(spa));
 
 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
 
 	/*
 	 * This should only be called for a non-faulted pool, and since a
 	 * future version would result in an unopenable pool, this shouldn't be
 	 * possible.
 	 */
 	ASSERT(SPA_VERSION_IS_SUPPORTED(spa->spa_uberblock.ub_version));
 	ASSERT3U(version, >=, spa->spa_uberblock.ub_version);
 
 	spa->spa_uberblock.ub_version = version;
 	vdev_config_dirty(spa->spa_root_vdev);
 
 	spa_config_exit(spa, SCL_ALL, FTAG);
 
 	txg_wait_synced(spa_get_dsl(spa), 0);
 }
 
 boolean_t
 spa_has_spare(spa_t *spa, uint64_t guid)
 {
 	int i;
 	uint64_t spareguid;
 	spa_aux_vdev_t *sav = &spa->spa_spares;
 
 	for (i = 0; i < sav->sav_count; i++)
 		if (sav->sav_vdevs[i]->vdev_guid == guid)
 			return (B_TRUE);
 
 	for (i = 0; i < sav->sav_npending; i++) {
 		if (nvlist_lookup_uint64(sav->sav_pending[i], ZPOOL_CONFIG_GUID,
 		    &spareguid) == 0 && spareguid == guid)
 			return (B_TRUE);
 	}
 
 	return (B_FALSE);
 }
 
 /*
  * Check if a pool has an active shared spare device.
  * Note: reference count of an active spare is 2, as a spare and as a replace
  */
 static boolean_t
 spa_has_active_shared_spare(spa_t *spa)
 {
 	int i, refcnt;
 	uint64_t pool;
 	spa_aux_vdev_t *sav = &spa->spa_spares;
 
 	for (i = 0; i < sav->sav_count; i++) {
 		if (spa_spare_exists(sav->sav_vdevs[i]->vdev_guid, &pool,
 		    &refcnt) && pool != 0ULL && pool == spa_guid(spa) &&
 		    refcnt > 2)
 			return (B_TRUE);
 	}
 
 	return (B_FALSE);
 }
 
 static sysevent_t *
 spa_event_create(spa_t *spa, vdev_t *vd, const char *name)
 {
 	sysevent_t		*ev = NULL;
 #ifdef _KERNEL
 	sysevent_attr_list_t	*attr = NULL;
 	sysevent_value_t	value;
 
 	ev = sysevent_alloc(EC_ZFS, (char *)name, SUNW_KERN_PUB "zfs",
 	    SE_SLEEP);
 	ASSERT(ev != NULL);
 
 	value.value_type = SE_DATA_TYPE_STRING;
 	value.value.sv_string = spa_name(spa);
 	if (sysevent_add_attr(&attr, ZFS_EV_POOL_NAME, &value, SE_SLEEP) != 0)
 		goto done;
 
 	value.value_type = SE_DATA_TYPE_UINT64;
 	value.value.sv_uint64 = spa_guid(spa);
 	if (sysevent_add_attr(&attr, ZFS_EV_POOL_GUID, &value, SE_SLEEP) != 0)
 		goto done;
 
 	if (vd) {
 		value.value_type = SE_DATA_TYPE_UINT64;
 		value.value.sv_uint64 = vd->vdev_guid;
 		if (sysevent_add_attr(&attr, ZFS_EV_VDEV_GUID, &value,
 		    SE_SLEEP) != 0)
 			goto done;
 
 		if (vd->vdev_path) {
 			value.value_type = SE_DATA_TYPE_STRING;
 			value.value.sv_string = vd->vdev_path;
 			if (sysevent_add_attr(&attr, ZFS_EV_VDEV_PATH,
 			    &value, SE_SLEEP) != 0)
 				goto done;
 		}
 	}
 
 	if (sysevent_attach_attributes(ev, attr) != 0)
 		goto done;
 	attr = NULL;
 
 done:
 	if (attr)
 		sysevent_free_attr(attr);
 
 #endif
 	return (ev);
 }
 
 static void
 spa_event_post(sysevent_t *ev)
 {
 #ifdef _KERNEL
 	sysevent_id_t		eid;
 
 	(void) log_sysevent(ev, SE_SLEEP, &eid);
 	sysevent_free(ev);
 #endif
 }
 
 /*
  * Post a sysevent corresponding to the given event.  The 'name' must be one of
  * the event definitions in sys/sysevent/eventdefs.h.  The payload will be
  * filled in from the spa and (optionally) the vdev.  This doesn't do anything
  * in the userland libzpool, as we don't want consumers to misinterpret ztest
  * or zdb as real changes.
  */
 void
 spa_event_notify(spa_t *spa, vdev_t *vd, const char *name)
 {
 	spa_event_post(spa_event_create(spa, vd, name));
 }
Index: projects/clang391-import/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c
===================================================================
--- projects/clang391-import/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c	(revision 309262)
+++ projects/clang391-import/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c	(revision 309263)
@@ -1,2150 +1,2196 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
- * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
  * Copyright (c) 2014 Integros [integros.com]
  */
 
 /* Portions Copyright 2010 Robert Milkowski */
 
 #include <sys/zfs_context.h>
 #include <sys/spa.h>
 #include <sys/dmu.h>
 #include <sys/zap.h>
 #include <sys/arc.h>
 #include <sys/stat.h>
 #include <sys/resource.h>
 #include <sys/zil.h>
 #include <sys/zil_impl.h>
 #include <sys/dsl_dataset.h>
 #include <sys/vdev_impl.h>
 #include <sys/dmu_tx.h>
 #include <sys/dsl_pool.h>
 
 /*
  * The zfs intent log (ZIL) saves transaction records of system calls
  * that change the file system in memory with enough information
  * to be able to replay them. These are stored in memory until
  * either the DMU transaction group (txg) commits them to the stable pool
  * and they can be discarded, or they are flushed to the stable log
  * (also in the pool) due to a fsync, O_DSYNC or other synchronous
  * requirement. In the event of a panic or power fail then those log
  * records (transactions) are replayed.
  *
  * There is one ZIL per file system. Its on-disk (pool) format consists
  * of 3 parts:
  *
  * 	- ZIL header
  * 	- ZIL blocks
  * 	- ZIL records
  *
  * A log record holds a system call transaction. Log blocks can
  * hold many log records and the blocks are chained together.
  * Each ZIL block contains a block pointer (blkptr_t) to the next
  * ZIL block in the chain. The ZIL header points to the first
  * block in the chain. Note there is not a fixed place in the pool
  * to hold blocks. They are dynamically allocated and freed as
  * needed from the blocks available. Figure X shows the ZIL structure:
  */
 
 /*
  * Disable intent logging replay.  This global ZIL switch affects all pools.
  */
 int zil_replay_disable = 0;
 SYSCTL_DECL(_vfs_zfs);
 SYSCTL_INT(_vfs_zfs, OID_AUTO, zil_replay_disable, CTLFLAG_RWTUN,
     &zil_replay_disable, 0, "Disable intent logging replay");
 
 /*
  * Tunable parameter for debugging or performance analysis.  Setting
  * zfs_nocacheflush will cause corruption on power loss if a volatile
  * out-of-order write cache is enabled.
  */
 boolean_t zfs_nocacheflush = B_FALSE;
 SYSCTL_INT(_vfs_zfs, OID_AUTO, cache_flush_disable, CTLFLAG_RDTUN,
     &zfs_nocacheflush, 0, "Disable cache flush");
 boolean_t zfs_trim_enabled = B_TRUE;
 SYSCTL_DECL(_vfs_zfs_trim);
 SYSCTL_INT(_vfs_zfs_trim, OID_AUTO, enabled, CTLFLAG_RDTUN, &zfs_trim_enabled, 0,
     "Enable ZFS TRIM");
 
 /*
  * Limit SLOG write size per commit executed with synchronous priority.
  * Any writes above that executed with lower (asynchronous) priority to
  * limit potential SLOG device abuse by single active ZIL writer.
  */
 uint64_t zil_slog_limit = 768 * 1024;
 SYSCTL_QUAD(_vfs_zfs, OID_AUTO, zil_slog_limit, CTLFLAG_RWTUN,
     &zil_slog_limit, 0, "Maximal SLOG commit size with sync priority");
 
 static kmem_cache_t *zil_lwb_cache;
 
 #define	LWB_EMPTY(lwb) ((BP_GET_LSIZE(&lwb->lwb_blk) - \
     sizeof (zil_chain_t)) == (lwb->lwb_sz - lwb->lwb_nused))
 
 
 /*
  * ziltest is by and large an ugly hack, but very useful in
  * checking replay without tedious work.
  * When running ziltest we want to keep all itx's and so maintain
  * a single list in the zl_itxg[] that uses a high txg: ZILTEST_TXG
  * We subtract TXG_CONCURRENT_STATES to allow for common code.
  */
 #define	ZILTEST_TXG (UINT64_MAX - TXG_CONCURRENT_STATES)
 
 static int
 zil_bp_compare(const void *x1, const void *x2)
 {
 	const dva_t *dva1 = &((zil_bp_node_t *)x1)->zn_dva;
 	const dva_t *dva2 = &((zil_bp_node_t *)x2)->zn_dva;
 
 	if (DVA_GET_VDEV(dva1) < DVA_GET_VDEV(dva2))
 		return (-1);
 	if (DVA_GET_VDEV(dva1) > DVA_GET_VDEV(dva2))
 		return (1);
 
 	if (DVA_GET_OFFSET(dva1) < DVA_GET_OFFSET(dva2))
 		return (-1);
 	if (DVA_GET_OFFSET(dva1) > DVA_GET_OFFSET(dva2))
 		return (1);
 
 	return (0);
 }
 
 static void
 zil_bp_tree_init(zilog_t *zilog)
 {
 	avl_create(&zilog->zl_bp_tree, zil_bp_compare,
 	    sizeof (zil_bp_node_t), offsetof(zil_bp_node_t, zn_node));
 }
 
 static void
 zil_bp_tree_fini(zilog_t *zilog)
 {
 	avl_tree_t *t = &zilog->zl_bp_tree;
 	zil_bp_node_t *zn;
 	void *cookie = NULL;
 
 	while ((zn = avl_destroy_nodes(t, &cookie)) != NULL)
 		kmem_free(zn, sizeof (zil_bp_node_t));
 
 	avl_destroy(t);
 }
 
 int
 zil_bp_tree_add(zilog_t *zilog, const blkptr_t *bp)
 {
 	avl_tree_t *t = &zilog->zl_bp_tree;
 	const dva_t *dva;
 	zil_bp_node_t *zn;
 	avl_index_t where;
 
 	if (BP_IS_EMBEDDED(bp))
 		return (0);
 
 	dva = BP_IDENTITY(bp);
 
 	if (avl_find(t, dva, &where) != NULL)
 		return (SET_ERROR(EEXIST));
 
 	zn = kmem_alloc(sizeof (zil_bp_node_t), KM_SLEEP);
 	zn->zn_dva = *dva;
 	avl_insert(t, zn, where);
 
 	return (0);
 }
 
 static zil_header_t *
 zil_header_in_syncing_context(zilog_t *zilog)
 {
 	return ((zil_header_t *)zilog->zl_header);
 }
 
 static void
 zil_init_log_chain(zilog_t *zilog, blkptr_t *bp)
 {
 	zio_cksum_t *zc = &bp->blk_cksum;
 
 	zc->zc_word[ZIL_ZC_GUID_0] = spa_get_random(-1ULL);
 	zc->zc_word[ZIL_ZC_GUID_1] = spa_get_random(-1ULL);
 	zc->zc_word[ZIL_ZC_OBJSET] = dmu_objset_id(zilog->zl_os);
 	zc->zc_word[ZIL_ZC_SEQ] = 1ULL;
 }
 
 /*
  * Read a log block and make sure it's valid.
  */
 static int
 zil_read_log_block(zilog_t *zilog, const blkptr_t *bp, blkptr_t *nbp, void *dst,
     char **end)
 {
 	enum zio_flag zio_flags = ZIO_FLAG_CANFAIL;
 	arc_flags_t aflags = ARC_FLAG_WAIT;
 	arc_buf_t *abuf = NULL;
 	zbookmark_phys_t zb;
 	int error;
 
 	if (zilog->zl_header->zh_claim_txg == 0)
 		zio_flags |= ZIO_FLAG_SPECULATIVE | ZIO_FLAG_SCRUB;
 
 	if (!(zilog->zl_header->zh_flags & ZIL_CLAIM_LR_SEQ_VALID))
 		zio_flags |= ZIO_FLAG_SPECULATIVE;
 
 	SET_BOOKMARK(&zb, bp->blk_cksum.zc_word[ZIL_ZC_OBJSET],
 	    ZB_ZIL_OBJECT, ZB_ZIL_LEVEL, bp->blk_cksum.zc_word[ZIL_ZC_SEQ]);
 
 	error = arc_read(NULL, zilog->zl_spa, bp, arc_getbuf_func, &abuf,
 	    ZIO_PRIORITY_SYNC_READ, zio_flags, &aflags, &zb);
 
 	if (error == 0) {
 		zio_cksum_t cksum = bp->blk_cksum;
 
 		/*
 		 * Validate the checksummed log block.
 		 *
 		 * Sequence numbers should be... sequential.  The checksum
 		 * verifier for the next block should be bp's checksum plus 1.
 		 *
 		 * Also check the log chain linkage and size used.
 		 */
 		cksum.zc_word[ZIL_ZC_SEQ]++;
 
 		if (BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_ZILOG2) {
 			zil_chain_t *zilc = abuf->b_data;
 			char *lr = (char *)(zilc + 1);
 			uint64_t len = zilc->zc_nused - sizeof (zil_chain_t);
 
 			if (bcmp(&cksum, &zilc->zc_next_blk.blk_cksum,
 			    sizeof (cksum)) || BP_IS_HOLE(&zilc->zc_next_blk)) {
 				error = SET_ERROR(ECKSUM);
 			} else {
 				ASSERT3U(len, <=, SPA_OLD_MAXBLOCKSIZE);
 				bcopy(lr, dst, len);
 				*end = (char *)dst + len;
 				*nbp = zilc->zc_next_blk;
 			}
 		} else {
 			char *lr = abuf->b_data;
 			uint64_t size = BP_GET_LSIZE(bp);
 			zil_chain_t *zilc = (zil_chain_t *)(lr + size) - 1;
 
 			if (bcmp(&cksum, &zilc->zc_next_blk.blk_cksum,
 			    sizeof (cksum)) || BP_IS_HOLE(&zilc->zc_next_blk) ||
 			    (zilc->zc_nused > (size - sizeof (*zilc)))) {
 				error = SET_ERROR(ECKSUM);
 			} else {
 				ASSERT3U(zilc->zc_nused, <=,
 				    SPA_OLD_MAXBLOCKSIZE);
 				bcopy(lr, dst, zilc->zc_nused);
 				*end = (char *)dst + zilc->zc_nused;
 				*nbp = zilc->zc_next_blk;
 			}
 		}
 
 		arc_buf_destroy(abuf, &abuf);
 	}
 
 	return (error);
 }
 
 /*
  * Read a TX_WRITE log data block.
  */
 static int
 zil_read_log_data(zilog_t *zilog, const lr_write_t *lr, void *wbuf)
 {
 	enum zio_flag zio_flags = ZIO_FLAG_CANFAIL;
 	const blkptr_t *bp = &lr->lr_blkptr;
 	arc_flags_t aflags = ARC_FLAG_WAIT;
 	arc_buf_t *abuf = NULL;
 	zbookmark_phys_t zb;
 	int error;
 
 	if (BP_IS_HOLE(bp)) {
 		if (wbuf != NULL)
 			bzero(wbuf, MAX(BP_GET_LSIZE(bp), lr->lr_length));
 		return (0);
 	}
 
 	if (zilog->zl_header->zh_claim_txg == 0)
 		zio_flags |= ZIO_FLAG_SPECULATIVE | ZIO_FLAG_SCRUB;
 
 	SET_BOOKMARK(&zb, dmu_objset_id(zilog->zl_os), lr->lr_foid,
 	    ZB_ZIL_LEVEL, lr->lr_offset / BP_GET_LSIZE(bp));
 
 	error = arc_read(NULL, zilog->zl_spa, bp, arc_getbuf_func, &abuf,
 	    ZIO_PRIORITY_SYNC_READ, zio_flags, &aflags, &zb);
 
 	if (error == 0) {
 		if (wbuf != NULL)
 			bcopy(abuf->b_data, wbuf, arc_buf_size(abuf));
 		arc_buf_destroy(abuf, &abuf);
 	}
 
 	return (error);
 }
 
 /*
  * Parse the intent log, and call parse_func for each valid record within.
  */
 int
 zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
     zil_parse_lr_func_t *parse_lr_func, void *arg, uint64_t txg)
 {
 	const zil_header_t *zh = zilog->zl_header;
 	boolean_t claimed = !!zh->zh_claim_txg;
 	uint64_t claim_blk_seq = claimed ? zh->zh_claim_blk_seq : UINT64_MAX;
 	uint64_t claim_lr_seq = claimed ? zh->zh_claim_lr_seq : UINT64_MAX;
 	uint64_t max_blk_seq = 0;
 	uint64_t max_lr_seq = 0;
 	uint64_t blk_count = 0;
 	uint64_t lr_count = 0;
 	blkptr_t blk, next_blk;
 	char *lrbuf, *lrp;
 	int error = 0;
 
 	/*
 	 * Old logs didn't record the maximum zh_claim_lr_seq.
 	 */
 	if (!(zh->zh_flags & ZIL_CLAIM_LR_SEQ_VALID))
 		claim_lr_seq = UINT64_MAX;
 
 	/*
 	 * Starting at the block pointed to by zh_log we read the log chain.
 	 * For each block in the chain we strongly check that block to
 	 * ensure its validity.  We stop when an invalid block is found.
 	 * For each block pointer in the chain we call parse_blk_func().
 	 * For each record in each valid block we call parse_lr_func().
 	 * If the log has been claimed, stop if we encounter a sequence
 	 * number greater than the highest claimed sequence number.
 	 */
 	lrbuf = zio_buf_alloc(SPA_OLD_MAXBLOCKSIZE);
 	zil_bp_tree_init(zilog);
 
 	for (blk = zh->zh_log; !BP_IS_HOLE(&blk); blk = next_blk) {
 		uint64_t blk_seq = blk.blk_cksum.zc_word[ZIL_ZC_SEQ];
 		int reclen;
 		char *end;
 
 		if (blk_seq > claim_blk_seq)
 			break;
 		if ((error = parse_blk_func(zilog, &blk, arg, txg)) != 0)
 			break;
 		ASSERT3U(max_blk_seq, <, blk_seq);
 		max_blk_seq = blk_seq;
 		blk_count++;
 
 		if (max_lr_seq == claim_lr_seq && max_blk_seq == claim_blk_seq)
 			break;
 
 		error = zil_read_log_block(zilog, &blk, &next_blk, lrbuf, &end);
 		if (error != 0)
 			break;
 
 		for (lrp = lrbuf; lrp < end; lrp += reclen) {
 			lr_t *lr = (lr_t *)lrp;
 			reclen = lr->lrc_reclen;
 			ASSERT3U(reclen, >=, sizeof (lr_t));
 			if (lr->lrc_seq > claim_lr_seq)
 				goto done;
 			if ((error = parse_lr_func(zilog, lr, arg, txg)) != 0)
 				goto done;
 			ASSERT3U(max_lr_seq, <, lr->lrc_seq);
 			max_lr_seq = lr->lrc_seq;
 			lr_count++;
 		}
 	}
 done:
 	zilog->zl_parse_error = error;
 	zilog->zl_parse_blk_seq = max_blk_seq;
 	zilog->zl_parse_lr_seq = max_lr_seq;
 	zilog->zl_parse_blk_count = blk_count;
 	zilog->zl_parse_lr_count = lr_count;
 
 	ASSERT(!claimed || !(zh->zh_flags & ZIL_CLAIM_LR_SEQ_VALID) ||
 	    (max_blk_seq == claim_blk_seq && max_lr_seq == claim_lr_seq));
 
 	zil_bp_tree_fini(zilog);
 	zio_buf_free(lrbuf, SPA_OLD_MAXBLOCKSIZE);
 
 	return (error);
 }
 
 static int
 zil_claim_log_block(zilog_t *zilog, blkptr_t *bp, void *tx, uint64_t first_txg)
 {
 	/*
 	 * Claim log block if not already committed and not already claimed.
 	 * If tx == NULL, just verify that the block is claimable.
 	 */
 	if (BP_IS_HOLE(bp) || bp->blk_birth < first_txg ||
 	    zil_bp_tree_add(zilog, bp) != 0)
 		return (0);
 
 	return (zio_wait(zio_claim(NULL, zilog->zl_spa,
 	    tx == NULL ? 0 : first_txg, bp, spa_claim_notify, NULL,
 	    ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_SCRUB)));
 }
 
 static int
 zil_claim_log_record(zilog_t *zilog, lr_t *lrc, void *tx, uint64_t first_txg)
 {
 	lr_write_t *lr = (lr_write_t *)lrc;
 	int error;
 
 	if (lrc->lrc_txtype != TX_WRITE)
 		return (0);
 
 	/*
 	 * If the block is not readable, don't claim it.  This can happen
 	 * in normal operation when a log block is written to disk before
 	 * some of the dmu_sync() blocks it points to.  In this case, the
 	 * transaction cannot have been committed to anyone (we would have
 	 * waited for all writes to be stable first), so it is semantically
 	 * correct to declare this the end of the log.
 	 */
 	if (lr->lr_blkptr.blk_birth >= first_txg &&
 	    (error = zil_read_log_data(zilog, lr, NULL)) != 0)
 		return (error);
 	return (zil_claim_log_block(zilog, &lr->lr_blkptr, tx, first_txg));
 }
 
 /* ARGSUSED */
 static int
 zil_free_log_block(zilog_t *zilog, blkptr_t *bp, void *tx, uint64_t claim_txg)
 {
 	zio_free_zil(zilog->zl_spa, dmu_tx_get_txg(tx), bp);
 
 	return (0);
 }
 
 static int
 zil_free_log_record(zilog_t *zilog, lr_t *lrc, void *tx, uint64_t claim_txg)
 {
 	lr_write_t *lr = (lr_write_t *)lrc;
 	blkptr_t *bp = &lr->lr_blkptr;
 
 	/*
 	 * If we previously claimed it, we need to free it.
 	 */
 	if (claim_txg != 0 && lrc->lrc_txtype == TX_WRITE &&
 	    bp->blk_birth >= claim_txg && zil_bp_tree_add(zilog, bp) == 0 &&
 	    !BP_IS_HOLE(bp))
 		zio_free(zilog->zl_spa, dmu_tx_get_txg(tx), bp);
 
 	return (0);
 }
 
 static lwb_t *
 zil_alloc_lwb(zilog_t *zilog, blkptr_t *bp, boolean_t slog, uint64_t txg)
 {
 	lwb_t *lwb;
 
 	lwb = kmem_cache_alloc(zil_lwb_cache, KM_SLEEP);
 	lwb->lwb_zilog = zilog;
 	lwb->lwb_blk = *bp;
 	lwb->lwb_slog = slog;
 	lwb->lwb_buf = zio_buf_alloc(BP_GET_LSIZE(bp));
 	lwb->lwb_max_txg = txg;
 	lwb->lwb_zio = NULL;
 	lwb->lwb_tx = NULL;
 	if (BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_ZILOG2) {
 		lwb->lwb_nused = sizeof (zil_chain_t);
 		lwb->lwb_sz = BP_GET_LSIZE(bp);
 	} else {
 		lwb->lwb_nused = 0;
 		lwb->lwb_sz = BP_GET_LSIZE(bp) - sizeof (zil_chain_t);
 	}
 
 	mutex_enter(&zilog->zl_lock);
 	list_insert_tail(&zilog->zl_lwb_list, lwb);
 	mutex_exit(&zilog->zl_lock);
 
 	return (lwb);
 }
 
 /*
  * Called when we create in-memory log transactions so that we know
  * to cleanup the itxs at the end of spa_sync().
  */
 void
 zilog_dirty(zilog_t *zilog, uint64_t txg)
 {
 	dsl_pool_t *dp = zilog->zl_dmu_pool;
 	dsl_dataset_t *ds = dmu_objset_ds(zilog->zl_os);
 
 	if (ds->ds_is_snapshot)
 		panic("dirtying snapshot!");
 
 	if (txg_list_add(&dp->dp_dirty_zilogs, zilog, txg)) {
 		/* up the hold count until we can be written out */
 		dmu_buf_add_ref(ds->ds_dbuf, zilog);
 	}
 }
 
+/*
+ * Determine if the zil is dirty in the specified txg. Callers wanting to
+ * ensure that the dirty state does not change must hold the itxg_lock for
+ * the specified txg. Holding the lock will ensure that the zil cannot be
+ * dirtied (zil_itx_assign) or cleaned (zil_clean) while we check its current
+ * state.
+ */
 boolean_t
+zilog_is_dirty_in_txg(zilog_t *zilog, uint64_t txg)
+{
+	dsl_pool_t *dp = zilog->zl_dmu_pool;
+
+	if (txg_list_member(&dp->dp_dirty_zilogs, zilog, txg & TXG_MASK))
+		return (B_TRUE);
+	return (B_FALSE);
+}
+
+/*
+ * Determine if the zil is dirty. The zil is considered dirty if it has
+ * any pending itx records that have not been cleaned by zil_clean().
+ */
+boolean_t
 zilog_is_dirty(zilog_t *zilog)
 {
 	dsl_pool_t *dp = zilog->zl_dmu_pool;
 
 	for (int t = 0; t < TXG_SIZE; t++) {
 		if (txg_list_member(&dp->dp_dirty_zilogs, zilog, t))
 			return (B_TRUE);
 	}
 	return (B_FALSE);
 }
 
 /*
  * Create an on-disk intent log.
  */
 static lwb_t *
 zil_create(zilog_t *zilog)
 {
 	const zil_header_t *zh = zilog->zl_header;
 	lwb_t *lwb = NULL;
 	uint64_t txg = 0;
 	dmu_tx_t *tx = NULL;
 	blkptr_t blk;
 	int error = 0;
 	boolean_t slog = FALSE;
 
 	/*
 	 * Wait for any previous destroy to complete.
 	 */
 	txg_wait_synced(zilog->zl_dmu_pool, zilog->zl_destroy_txg);
 
 	ASSERT(zh->zh_claim_txg == 0);
 	ASSERT(zh->zh_replay_seq == 0);
 
 	blk = zh->zh_log;
 
 	/*
 	 * Allocate an initial log block if:
 	 *    - there isn't one already
 	 *    - the existing block is the wrong endianess
 	 */
 	if (BP_IS_HOLE(&blk) || BP_SHOULD_BYTESWAP(&blk)) {
 		tx = dmu_tx_create(zilog->zl_os);
 		VERIFY(dmu_tx_assign(tx, TXG_WAIT) == 0);
 		dsl_dataset_dirty(dmu_objset_ds(zilog->zl_os), tx);
 		txg = dmu_tx_get_txg(tx);
 
 		if (!BP_IS_HOLE(&blk)) {
 			zio_free_zil(zilog->zl_spa, txg, &blk);
 			BP_ZERO(&blk);
 		}
 
 		error = zio_alloc_zil(zilog->zl_spa, txg, &blk, NULL,
 		    ZIL_MIN_BLKSZ, &slog);
 
 		if (error == 0)
 			zil_init_log_chain(zilog, &blk);
 	}
 
 	/*
 	 * Allocate a log write buffer (lwb) for the first log block.
 	 */
 	if (error == 0)
 		lwb = zil_alloc_lwb(zilog, &blk, slog, txg);
 
 	/*
 	 * If we just allocated the first log block, commit our transaction
 	 * and wait for zil_sync() to stuff the block poiner into zh_log.
 	 * (zh is part of the MOS, so we cannot modify it in open context.)
 	 */
 	if (tx != NULL) {
 		dmu_tx_commit(tx);
 		txg_wait_synced(zilog->zl_dmu_pool, txg);
 	}
 
 	ASSERT(bcmp(&blk, &zh->zh_log, sizeof (blk)) == 0);
 
 	return (lwb);
 }
 
 /*
  * In one tx, free all log blocks and clear the log header.
  * If keep_first is set, then we're replaying a log with no content.
  * We want to keep the first block, however, so that the first
  * synchronous transaction doesn't require a txg_wait_synced()
  * in zil_create().  We don't need to txg_wait_synced() here either
  * when keep_first is set, because both zil_create() and zil_destroy()
  * will wait for any in-progress destroys to complete.
  */
 void
 zil_destroy(zilog_t *zilog, boolean_t keep_first)
 {
 	const zil_header_t *zh = zilog->zl_header;
 	lwb_t *lwb;
 	dmu_tx_t *tx;
 	uint64_t txg;
 
 	/*
 	 * Wait for any previous destroy to complete.
 	 */
 	txg_wait_synced(zilog->zl_dmu_pool, zilog->zl_destroy_txg);
 
 	zilog->zl_old_header = *zh;		/* debugging aid */
 
 	if (BP_IS_HOLE(&zh->zh_log))
 		return;
 
 	tx = dmu_tx_create(zilog->zl_os);
 	VERIFY(dmu_tx_assign(tx, TXG_WAIT) == 0);
 	dsl_dataset_dirty(dmu_objset_ds(zilog->zl_os), tx);
 	txg = dmu_tx_get_txg(tx);
 
 	mutex_enter(&zilog->zl_lock);
 
 	ASSERT3U(zilog->zl_destroy_txg, <, txg);
 	zilog->zl_destroy_txg = txg;
 	zilog->zl_keep_first = keep_first;
 
 	if (!list_is_empty(&zilog->zl_lwb_list)) {
 		ASSERT(zh->zh_claim_txg == 0);
 		VERIFY(!keep_first);
 		while ((lwb = list_head(&zilog->zl_lwb_list)) != NULL) {
 			list_remove(&zilog->zl_lwb_list, lwb);
 			if (lwb->lwb_buf != NULL)
 				zio_buf_free(lwb->lwb_buf, lwb->lwb_sz);
 			zio_free_zil(zilog->zl_spa, txg, &lwb->lwb_blk);
 			kmem_cache_free(zil_lwb_cache, lwb);
 		}
 	} else if (!keep_first) {
 		zil_destroy_sync(zilog, tx);
 	}
 	mutex_exit(&zilog->zl_lock);
 
 	dmu_tx_commit(tx);
 }
 
 void
 zil_destroy_sync(zilog_t *zilog, dmu_tx_t *tx)
 {
 	ASSERT(list_is_empty(&zilog->zl_lwb_list));
 	(void) zil_parse(zilog, zil_free_log_block,
 	    zil_free_log_record, tx, zilog->zl_header->zh_claim_txg);
 }
 
 int
 zil_claim(dsl_pool_t *dp, dsl_dataset_t *ds, void *txarg)
 {
 	dmu_tx_t *tx = txarg;
 	uint64_t first_txg = dmu_tx_get_txg(tx);
 	zilog_t *zilog;
 	zil_header_t *zh;
 	objset_t *os;
 	int error;
 
 	error = dmu_objset_own_obj(dp, ds->ds_object,
 	    DMU_OST_ANY, B_FALSE, FTAG, &os);
 	if (error != 0) {
 		/*
 		 * EBUSY indicates that the objset is inconsistent, in which
 		 * case it can not have a ZIL.
 		 */
 		if (error != EBUSY) {
 			cmn_err(CE_WARN, "can't open objset for %llu, error %u",
 			    (unsigned long long)ds->ds_object, error);
 		}
 		return (0);
 	}
 
 	zilog = dmu_objset_zil(os);
 	zh = zil_header_in_syncing_context(zilog);
 
 	if (spa_get_log_state(zilog->zl_spa) == SPA_LOG_CLEAR) {
 		if (!BP_IS_HOLE(&zh->zh_log))
 			zio_free_zil(zilog->zl_spa, first_txg, &zh->zh_log);
 		BP_ZERO(&zh->zh_log);
 		dsl_dataset_dirty(dmu_objset_ds(os), tx);
 		dmu_objset_disown(os, FTAG);
 		return (0);
 	}
 
 	/*
 	 * Claim all log blocks if we haven't already done so, and remember
 	 * the highest claimed sequence number.  This ensures that if we can
 	 * read only part of the log now (e.g. due to a missing device),
 	 * but we can read the entire log later, we will not try to replay
 	 * or destroy beyond the last block we successfully claimed.
 	 */
 	ASSERT3U(zh->zh_claim_txg, <=, first_txg);
 	if (zh->zh_claim_txg == 0 && !BP_IS_HOLE(&zh->zh_log)) {
 		(void) zil_parse(zilog, zil_claim_log_block,
 		    zil_claim_log_record, tx, first_txg);
 		zh->zh_claim_txg = first_txg;
 		zh->zh_claim_blk_seq = zilog->zl_parse_blk_seq;
 		zh->zh_claim_lr_seq = zilog->zl_parse_lr_seq;
 		if (zilog->zl_parse_lr_count || zilog->zl_parse_blk_count > 1)
 			zh->zh_flags |= ZIL_REPLAY_NEEDED;
 		zh->zh_flags |= ZIL_CLAIM_LR_SEQ_VALID;
 		dsl_dataset_dirty(dmu_objset_ds(os), tx);
 	}
 
 	ASSERT3U(first_txg, ==, (spa_last_synced_txg(zilog->zl_spa) + 1));
 	dmu_objset_disown(os, FTAG);
 	return (0);
 }
 
 /*
  * Check the log by walking the log chain.
  * Checksum errors are ok as they indicate the end of the chain.
  * Any other error (no device or read failure) returns an error.
  */
 /* ARGSUSED */
 int
 zil_check_log_chain(dsl_pool_t *dp, dsl_dataset_t *ds, void *tx)
 {
 	zilog_t *zilog;
 	objset_t *os;
 	blkptr_t *bp;
 	int error;
 
 	ASSERT(tx == NULL);
 
 	error = dmu_objset_from_ds(ds, &os);
 	if (error != 0) {
 		cmn_err(CE_WARN, "can't open objset %llu, error %d",
 		    (unsigned long long)ds->ds_object, error);
 		return (0);
 	}
 
 	zilog = dmu_objset_zil(os);
 	bp = (blkptr_t *)&zilog->zl_header->zh_log;
 
 	/*
 	 * Check the first block and determine if it's on a log device
 	 * which may have been removed or faulted prior to loading this
 	 * pool.  If so, there's no point in checking the rest of the log
 	 * as its content should have already been synced to the pool.
 	 */
 	if (!BP_IS_HOLE(bp)) {
 		vdev_t *vd;
 		boolean_t valid = B_TRUE;
 
 		spa_config_enter(os->os_spa, SCL_STATE, FTAG, RW_READER);
 		vd = vdev_lookup_top(os->os_spa, DVA_GET_VDEV(&bp->blk_dva[0]));
 		if (vd->vdev_islog && vdev_is_dead(vd))
 			valid = vdev_log_state_valid(vd);
 		spa_config_exit(os->os_spa, SCL_STATE, FTAG);
 
 		if (!valid)
 			return (0);
 	}
 
 	/*
 	 * Because tx == NULL, zil_claim_log_block() will not actually claim
 	 * any blocks, but just determine whether it is possible to do so.
 	 * In addition to checking the log chain, zil_claim_log_block()
 	 * will invoke zio_claim() with a done func of spa_claim_notify(),
 	 * which will update spa_max_claim_txg.  See spa_load() for details.
 	 */
 	error = zil_parse(zilog, zil_claim_log_block, zil_claim_log_record, tx,
 	    zilog->zl_header->zh_claim_txg ? -1ULL : spa_first_txg(os->os_spa));
 
 	return ((error == ECKSUM || error == ENOENT) ? 0 : error);
 }
 
 static int
 zil_vdev_compare(const void *x1, const void *x2)
 {
 	const uint64_t v1 = ((zil_vdev_node_t *)x1)->zv_vdev;
 	const uint64_t v2 = ((zil_vdev_node_t *)x2)->zv_vdev;
 
 	if (v1 < v2)
 		return (-1);
 	if (v1 > v2)
 		return (1);
 
 	return (0);
 }
 
 void
 zil_add_block(zilog_t *zilog, const blkptr_t *bp)
 {
 	avl_tree_t *t = &zilog->zl_vdev_tree;
 	avl_index_t where;
 	zil_vdev_node_t *zv, zvsearch;
 	int ndvas = BP_GET_NDVAS(bp);
 	int i;
 
 	if (zfs_nocacheflush)
 		return;
 
 	ASSERT(zilog->zl_writer);
 
 	/*
 	 * Even though we're zl_writer, we still need a lock because the
 	 * zl_get_data() callbacks may have dmu_sync() done callbacks
 	 * that will run concurrently.
 	 */
 	mutex_enter(&zilog->zl_vdev_lock);
 	for (i = 0; i < ndvas; i++) {
 		zvsearch.zv_vdev = DVA_GET_VDEV(&bp->blk_dva[i]);
 		if (avl_find(t, &zvsearch, &where) == NULL) {
 			zv = kmem_alloc(sizeof (*zv), KM_SLEEP);
 			zv->zv_vdev = zvsearch.zv_vdev;
 			avl_insert(t, zv, where);
 		}
 	}
 	mutex_exit(&zilog->zl_vdev_lock);
 }
 
 static void
 zil_flush_vdevs(zilog_t *zilog)
 {
 	spa_t *spa = zilog->zl_spa;
 	avl_tree_t *t = &zilog->zl_vdev_tree;
 	void *cookie = NULL;
 	zil_vdev_node_t *zv;
 	zio_t *zio;
 
 	ASSERT(zilog->zl_writer);
 
 	/*
 	 * We don't need zl_vdev_lock here because we're the zl_writer,
 	 * and all zl_get_data() callbacks are done.
 	 */
 	if (avl_numnodes(t) == 0)
 		return;
 
 	spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
 
 	zio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL);
 
 	while ((zv = avl_destroy_nodes(t, &cookie)) != NULL) {
 		vdev_t *vd = vdev_lookup_top(spa, zv->zv_vdev);
 		if (vd != NULL)
 			zio_flush(zio, vd);
 		kmem_free(zv, sizeof (*zv));
 	}
 
 	/*
 	 * Wait for all the flushes to complete.  Not all devices actually
 	 * support the DKIOCFLUSHWRITECACHE ioctl, so it's OK if it fails.
 	 */
 	(void) zio_wait(zio);
 
 	spa_config_exit(spa, SCL_STATE, FTAG);
 }
 
 /*
  * Function called when a log block write completes
  */
 static void
 zil_lwb_write_done(zio_t *zio)
 {
 	lwb_t *lwb = zio->io_private;
 	zilog_t *zilog = lwb->lwb_zilog;
 	dmu_tx_t *tx = lwb->lwb_tx;
 
 	ASSERT(BP_GET_COMPRESS(zio->io_bp) == ZIO_COMPRESS_OFF);
 	ASSERT(BP_GET_TYPE(zio->io_bp) == DMU_OT_INTENT_LOG);
 	ASSERT(BP_GET_LEVEL(zio->io_bp) == 0);
 	ASSERT(BP_GET_BYTEORDER(zio->io_bp) == ZFS_HOST_BYTEORDER);
 	ASSERT(!BP_IS_GANG(zio->io_bp));
 	ASSERT(!BP_IS_HOLE(zio->io_bp));
 	ASSERT(BP_GET_FILL(zio->io_bp) == 0);
 
 	/*
 	 * Ensure the lwb buffer pointer is cleared before releasing
 	 * the txg. If we have had an allocation failure and
 	 * the txg is waiting to sync then we want want zil_sync()
 	 * to remove the lwb so that it's not picked up as the next new
 	 * one in zil_commit_writer(). zil_sync() will only remove
 	 * the lwb if lwb_buf is null.
 	 */
 	zio_buf_free(lwb->lwb_buf, lwb->lwb_sz);
 	mutex_enter(&zilog->zl_lock);
 	lwb->lwb_buf = NULL;
 	lwb->lwb_tx = NULL;
 	mutex_exit(&zilog->zl_lock);
 
 	/*
 	 * Now that we've written this log block, we have a stable pointer
 	 * to the next block in the chain, so it's OK to let the txg in
 	 * which we allocated the next block sync.
 	 */
 	dmu_tx_commit(tx);
 }
 
 /*
  * Initialize the io for a log block.
  */
 static void
 zil_lwb_write_init(zilog_t *zilog, lwb_t *lwb)
 {
 	zbookmark_phys_t zb;
 	zio_priority_t prio;
 
 	SET_BOOKMARK(&zb, lwb->lwb_blk.blk_cksum.zc_word[ZIL_ZC_OBJSET],
 	    ZB_ZIL_OBJECT, ZB_ZIL_LEVEL,
 	    lwb->lwb_blk.blk_cksum.zc_word[ZIL_ZC_SEQ]);
 
 	if (zilog->zl_root_zio == NULL) {
 		zilog->zl_root_zio = zio_root(zilog->zl_spa, NULL, NULL,
 		    ZIO_FLAG_CANFAIL);
 	}
 	if (lwb->lwb_zio == NULL) {
 		if (zilog->zl_cur_used <= zil_slog_limit || !lwb->lwb_slog)
 			prio = ZIO_PRIORITY_SYNC_WRITE;
 		else
 			prio = ZIO_PRIORITY_ASYNC_WRITE;
 		lwb->lwb_zio = zio_rewrite(zilog->zl_root_zio, zilog->zl_spa,
 		    0, &lwb->lwb_blk, lwb->lwb_buf, BP_GET_LSIZE(&lwb->lwb_blk),
 		    zil_lwb_write_done, lwb, prio,
 		    ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_PROPAGATE, &zb);
 	}
 }
 
 /*
  * Define a limited set of intent log block sizes.
  *
  * These must be a multiple of 4KB. Note only the amount used (again
  * aligned to 4KB) actually gets written. However, we can't always just
  * allocate SPA_OLD_MAXBLOCKSIZE as the slog space could be exhausted.
  */
 uint64_t zil_block_buckets[] = {
     4096,		/* non TX_WRITE */
     8192+4096,		/* data base */
     32*1024 + 4096, 	/* NFS writes */
     UINT64_MAX
 };
 
 /*
  * Start a log block write and advance to the next log block.
  * Calls are serialized.
  */
 static lwb_t *
 zil_lwb_write_start(zilog_t *zilog, lwb_t *lwb)
 {
 	lwb_t *nlwb = NULL;
 	zil_chain_t *zilc;
 	spa_t *spa = zilog->zl_spa;
 	blkptr_t *bp;
 	dmu_tx_t *tx;
 	uint64_t txg;
 	uint64_t zil_blksz, wsz;
 	int i, error;
 	boolean_t slog;
 
 	if (BP_GET_CHECKSUM(&lwb->lwb_blk) == ZIO_CHECKSUM_ZILOG2) {
 		zilc = (zil_chain_t *)lwb->lwb_buf;
 		bp = &zilc->zc_next_blk;
 	} else {
 		zilc = (zil_chain_t *)(lwb->lwb_buf + lwb->lwb_sz);
 		bp = &zilc->zc_next_blk;
 	}
 
 	ASSERT(lwb->lwb_nused <= lwb->lwb_sz);
 
 	/*
 	 * Allocate the next block and save its address in this block
 	 * before writing it in order to establish the log chain.
 	 * Note that if the allocation of nlwb synced before we wrote
 	 * the block that points at it (lwb), we'd leak it if we crashed.
 	 * Therefore, we don't do dmu_tx_commit() until zil_lwb_write_done().
 	 * We dirty the dataset to ensure that zil_sync() will be called
 	 * to clean up in the event of allocation failure or I/O failure.
 	 */
 	tx = dmu_tx_create(zilog->zl_os);
 	VERIFY(dmu_tx_assign(tx, TXG_WAIT) == 0);
 	dsl_dataset_dirty(dmu_objset_ds(zilog->zl_os), tx);
 	txg = dmu_tx_get_txg(tx);
 
 	lwb->lwb_tx = tx;
 
 	/*
 	 * Log blocks are pre-allocated. Here we select the size of the next
 	 * block, based on size used in the last block.
 	 * - first find the smallest bucket that will fit the block from a
 	 *   limited set of block sizes. This is because it's faster to write
 	 *   blocks allocated from the same metaslab as they are adjacent or
 	 *   close.
 	 * - next find the maximum from the new suggested size and an array of
 	 *   previous sizes. This lessens a picket fence effect of wrongly
 	 *   guesssing the size if we have a stream of say 2k, 64k, 2k, 64k
 	 *   requests.
 	 *
 	 * Note we only write what is used, but we can't just allocate
 	 * the maximum block size because we can exhaust the available
 	 * pool log space.
 	 */
 	zil_blksz = zilog->zl_cur_used + sizeof (zil_chain_t);
 	for (i = 0; zil_blksz > zil_block_buckets[i]; i++)
 		continue;
 	zil_blksz = zil_block_buckets[i];
 	if (zil_blksz == UINT64_MAX)
 		zil_blksz = SPA_OLD_MAXBLOCKSIZE;
 	zilog->zl_prev_blks[zilog->zl_prev_rotor] = zil_blksz;
 	for (i = 0; i < ZIL_PREV_BLKS; i++)
 		zil_blksz = MAX(zil_blksz, zilog->zl_prev_blks[i]);
 	zilog->zl_prev_rotor = (zilog->zl_prev_rotor + 1) & (ZIL_PREV_BLKS - 1);
 
 	BP_ZERO(bp);
 	/* pass the old blkptr in order to spread log blocks across devs */
 	error = zio_alloc_zil(spa, txg, bp, &lwb->lwb_blk, zil_blksz, &slog);
 	if (error == 0) {
 		ASSERT3U(bp->blk_birth, ==, txg);
 		bp->blk_cksum = lwb->lwb_blk.blk_cksum;
 		bp->blk_cksum.zc_word[ZIL_ZC_SEQ]++;
 
 		/*
 		 * Allocate a new log write buffer (lwb).
 		 */
 		nlwb = zil_alloc_lwb(zilog, bp, slog, txg);
 
 		/* Record the block for later vdev flushing */
 		zil_add_block(zilog, &lwb->lwb_blk);
 	}
 
 	if (BP_GET_CHECKSUM(&lwb->lwb_blk) == ZIO_CHECKSUM_ZILOG2) {
 		/* For Slim ZIL only write what is used. */
 		wsz = P2ROUNDUP_TYPED(lwb->lwb_nused, ZIL_MIN_BLKSZ, uint64_t);
 		ASSERT3U(wsz, <=, lwb->lwb_sz);
 		zio_shrink(lwb->lwb_zio, wsz);
 
 	} else {
 		wsz = lwb->lwb_sz;
 	}
 
 	zilc->zc_pad = 0;
 	zilc->zc_nused = lwb->lwb_nused;
 	zilc->zc_eck.zec_cksum = lwb->lwb_blk.blk_cksum;
 
 	/*
 	 * clear unused data for security
 	 */
 	bzero(lwb->lwb_buf + lwb->lwb_nused, wsz - lwb->lwb_nused);
 
 	zio_nowait(lwb->lwb_zio); /* Kick off the write for the old log block */
 
 	/*
 	 * If there was an allocation failure then nlwb will be null which
 	 * forces a txg_wait_synced().
 	 */
 	return (nlwb);
 }
 
 static lwb_t *
 zil_lwb_commit(zilog_t *zilog, itx_t *itx, lwb_t *lwb)
 {
 	lr_t *lrcb, *lrc = &itx->itx_lr; /* common log record */
 	lr_write_t *lrwb, *lrw = (lr_write_t *)lrc;
 	char *lr_buf;
 	uint64_t txg = lrc->lrc_txg;
 	uint64_t reclen = lrc->lrc_reclen;
 	uint64_t dlen = 0;
 	uint64_t dnow, lwb_sp;
 
 	if (lwb == NULL)
 		return (NULL);
 
 	ASSERT(lwb->lwb_buf != NULL);
-	ASSERT(zilog_is_dirty(zilog) ||
-	    spa_freeze_txg(zilog->zl_spa) != UINT64_MAX);
 
 	if (lrc->lrc_txtype == TX_WRITE && itx->itx_wr_state == WR_NEED_COPY)
 		dlen = P2ROUNDUP_TYPED(
 		    lrw->lr_length, sizeof (uint64_t), uint64_t);
 
 	zilog->zl_cur_used += (reclen + dlen);
 
 	zil_lwb_write_init(zilog, lwb);
 
 cont:
 	/*
 	 * If this record won't fit in the current log block, start a new one.
 	 * For WR_NEED_COPY optimize layout for minimal number of chunks, but
 	 * try to keep wasted space withing reasonable range (12%).
 	 */
 	lwb_sp = lwb->lwb_sz - lwb->lwb_nused;
 	if (reclen > lwb_sp || (reclen + dlen > lwb_sp &&
 	    lwb_sp < ZIL_MAX_LOG_DATA / 8 && (dlen % ZIL_MAX_LOG_DATA == 0 ||
 	    lwb_sp < reclen + dlen % ZIL_MAX_LOG_DATA))) {
 		lwb = zil_lwb_write_start(zilog, lwb);
 		if (lwb == NULL)
 			return (NULL);
 		zil_lwb_write_init(zilog, lwb);
 		ASSERT(LWB_EMPTY(lwb));
 		lwb_sp = lwb->lwb_sz - lwb->lwb_nused;
 		ASSERT3U(reclen + MIN(dlen, sizeof(uint64_t)), <=, lwb_sp);
 	}
 
 	dnow = MIN(dlen, lwb_sp - reclen);
 	lr_buf = lwb->lwb_buf + lwb->lwb_nused;
 	bcopy(lrc, lr_buf, reclen);
 	lrcb = (lr_t *)lr_buf;
 	lrwb = (lr_write_t *)lrcb;
 
 	/*
 	 * If it's a write, fetch the data or get its blkptr as appropriate.
 	 */
 	if (lrc->lrc_txtype == TX_WRITE) {
 		if (txg > spa_freeze_txg(zilog->zl_spa))
 			txg_wait_synced(zilog->zl_dmu_pool, txg);
 		if (itx->itx_wr_state != WR_COPIED) {
 			char *dbuf;
 			int error;
 
 			if (itx->itx_wr_state == WR_NEED_COPY) {
 				dbuf = lr_buf + reclen;
 				lrcb->lrc_reclen += dnow;
 				if (lrwb->lr_length > dnow)
 					lrwb->lr_length = dnow;
 				lrw->lr_offset += dnow;
 				lrw->lr_length -= dnow;
 			} else {
 				ASSERT(itx->itx_wr_state == WR_INDIRECT);
 				dbuf = NULL;
 			}
 			error = zilog->zl_get_data(
 			    itx->itx_private, lrwb, dbuf, lwb->lwb_zio);
 			if (error == EIO) {
 				txg_wait_synced(zilog->zl_dmu_pool, txg);
 				return (lwb);
 			}
 			if (error != 0) {
 				ASSERT(error == ENOENT || error == EEXIST ||
 				    error == EALREADY);
 				return (lwb);
 			}
 		}
 	}
 
 	/*
 	 * We're actually making an entry, so update lrc_seq to be the
 	 * log record sequence number.  Note that this is generally not
 	 * equal to the itx sequence number because not all transactions
 	 * are synchronous, and sometimes spa_sync() gets there first.
 	 */
 	lrcb->lrc_seq = ++zilog->zl_lr_seq; /* we are single threaded */
 	lwb->lwb_nused += reclen + dnow;
 	lwb->lwb_max_txg = MAX(lwb->lwb_max_txg, txg);
 	ASSERT3U(lwb->lwb_nused, <=, lwb->lwb_sz);
 	ASSERT0(P2PHASE(lwb->lwb_nused, sizeof (uint64_t)));
 
 	dlen -= dnow;
 	if (dlen > 0) {
 		zilog->zl_cur_used += reclen;
 		goto cont;
 	}
 
 	return (lwb);
 }
 
 itx_t *
 zil_itx_create(uint64_t txtype, size_t lrsize)
 {
 	itx_t *itx;
 
 	lrsize = P2ROUNDUP_TYPED(lrsize, sizeof (uint64_t), size_t);
 
 	itx = kmem_alloc(offsetof(itx_t, itx_lr) + lrsize, KM_SLEEP);
 	itx->itx_lr.lrc_txtype = txtype;
 	itx->itx_lr.lrc_reclen = lrsize;
 	itx->itx_lr.lrc_seq = 0;	/* defensive */
 	itx->itx_sync = B_TRUE;		/* default is synchronous */
 
 	return (itx);
 }
 
 void
 zil_itx_destroy(itx_t *itx)
 {
 	kmem_free(itx, offsetof(itx_t, itx_lr) + itx->itx_lr.lrc_reclen);
 }
 
 /*
  * Free up the sync and async itxs. The itxs_t has already been detached
  * so no locks are needed.
  */
 static void
 zil_itxg_clean(itxs_t *itxs)
 {
 	itx_t *itx;
 	list_t *list;
 	avl_tree_t *t;
 	void *cookie;
 	itx_async_node_t *ian;
 
 	list = &itxs->i_sync_list;
 	while ((itx = list_head(list)) != NULL) {
 		list_remove(list, itx);
 		kmem_free(itx, offsetof(itx_t, itx_lr) +
 		    itx->itx_lr.lrc_reclen);
 	}
 
 	cookie = NULL;
 	t = &itxs->i_async_tree;
 	while ((ian = avl_destroy_nodes(t, &cookie)) != NULL) {
 		list = &ian->ia_list;
 		while ((itx = list_head(list)) != NULL) {
 			list_remove(list, itx);
 			kmem_free(itx, offsetof(itx_t, itx_lr) +
 			    itx->itx_lr.lrc_reclen);
 		}
 		list_destroy(list);
 		kmem_free(ian, sizeof (itx_async_node_t));
 	}
 	avl_destroy(t);
 
 	kmem_free(itxs, sizeof (itxs_t));
 }
 
 static int
 zil_aitx_compare(const void *x1, const void *x2)
 {
 	const uint64_t o1 = ((itx_async_node_t *)x1)->ia_foid;
 	const uint64_t o2 = ((itx_async_node_t *)x2)->ia_foid;
 
 	if (o1 < o2)
 		return (-1);
 	if (o1 > o2)
 		return (1);
 
 	return (0);
 }
 
 /*
  * Remove all async itx with the given oid.
  */
 static void
 zil_remove_async(zilog_t *zilog, uint64_t oid)
 {
 	uint64_t otxg, txg;
 	itx_async_node_t *ian;
 	avl_tree_t *t;
 	avl_index_t where;
 	list_t clean_list;
 	itx_t *itx;
 
 	ASSERT(oid != 0);
 	list_create(&clean_list, sizeof (itx_t), offsetof(itx_t, itx_node));
 
 	if (spa_freeze_txg(zilog->zl_spa) != UINT64_MAX) /* ziltest support */
 		otxg = ZILTEST_TXG;
 	else
 		otxg = spa_last_synced_txg(zilog->zl_spa) + 1;
 
 	for (txg = otxg; txg < (otxg + TXG_CONCURRENT_STATES); txg++) {
 		itxg_t *itxg = &zilog->zl_itxg[txg & TXG_MASK];
 
 		mutex_enter(&itxg->itxg_lock);
 		if (itxg->itxg_txg != txg) {
 			mutex_exit(&itxg->itxg_lock);
 			continue;
 		}
 
 		/*
 		 * Locate the object node and append its list.
 		 */
 		t = &itxg->itxg_itxs->i_async_tree;
 		ian = avl_find(t, &oid, &where);
 		if (ian != NULL)
 			list_move_tail(&clean_list, &ian->ia_list);
 		mutex_exit(&itxg->itxg_lock);
 	}
 	while ((itx = list_head(&clean_list)) != NULL) {
 		list_remove(&clean_list, itx);
 		kmem_free(itx, offsetof(itx_t, itx_lr) +
 		    itx->itx_lr.lrc_reclen);
 	}
 	list_destroy(&clean_list);
 }
 
 void
 zil_itx_assign(zilog_t *zilog, itx_t *itx, dmu_tx_t *tx)
 {
 	uint64_t txg;
 	itxg_t *itxg;
 	itxs_t *itxs, *clean = NULL;
 
 	/*
 	 * Object ids can be re-instantiated in the next txg so
 	 * remove any async transactions to avoid future leaks.
 	 * This can happen if a fsync occurs on the re-instantiated
 	 * object for a WR_INDIRECT or WR_NEED_COPY write, which gets
 	 * the new file data and flushes a write record for the old object.
 	 */
 	if ((itx->itx_lr.lrc_txtype & ~TX_CI) == TX_REMOVE)
 		zil_remove_async(zilog, itx->itx_oid);
 
 	/*
 	 * Ensure the data of a renamed file is committed before the rename.
 	 */
 	if ((itx->itx_lr.lrc_txtype & ~TX_CI) == TX_RENAME)
 		zil_async_to_sync(zilog, itx->itx_oid);
 
 	if (spa_freeze_txg(zilog->zl_spa) != UINT64_MAX)
 		txg = ZILTEST_TXG;
 	else
 		txg = dmu_tx_get_txg(tx);
 
 	itxg = &zilog->zl_itxg[txg & TXG_MASK];
 	mutex_enter(&itxg->itxg_lock);
 	itxs = itxg->itxg_itxs;
 	if (itxg->itxg_txg != txg) {
 		if (itxs != NULL) {
 			/*
 			 * The zil_clean callback hasn't got around to cleaning
 			 * this itxg. Save the itxs for release below.
 			 * This should be rare.
 			 */
 			clean = itxg->itxg_itxs;
 		}
 		itxg->itxg_txg = txg;
 		itxs = itxg->itxg_itxs = kmem_zalloc(sizeof (itxs_t), KM_SLEEP);
 
 		list_create(&itxs->i_sync_list, sizeof (itx_t),
 		    offsetof(itx_t, itx_node));
 		avl_create(&itxs->i_async_tree, zil_aitx_compare,
 		    sizeof (itx_async_node_t),
 		    offsetof(itx_async_node_t, ia_node));
 	}
 	if (itx->itx_sync) {
 		list_insert_tail(&itxs->i_sync_list, itx);
 	} else {
 		avl_tree_t *t = &itxs->i_async_tree;
 		uint64_t foid = ((lr_ooo_t *)&itx->itx_lr)->lr_foid;
 		itx_async_node_t *ian;
 		avl_index_t where;
 
 		ian = avl_find(t, &foid, &where);
 		if (ian == NULL) {
 			ian = kmem_alloc(sizeof (itx_async_node_t), KM_SLEEP);
 			list_create(&ian->ia_list, sizeof (itx_t),
 			    offsetof(itx_t, itx_node));
 			ian->ia_foid = foid;
 			avl_insert(t, ian, where);
 		}
 		list_insert_tail(&ian->ia_list, itx);
 	}
 
 	itx->itx_lr.lrc_txg = dmu_tx_get_txg(tx);
 	zilog_dirty(zilog, txg);
 	mutex_exit(&itxg->itxg_lock);
 
 	/* Release the old itxs now we've dropped the lock */
 	if (clean != NULL)
 		zil_itxg_clean(clean);
 }
 
 /*
  * If there are any in-memory intent log transactions which have now been
  * synced then start up a taskq to free them. We should only do this after we
  * have written out the uberblocks (i.e. txg has been comitted) so that
  * don't inadvertently clean out in-memory log records that would be required
  * by zil_commit().
  */
 void
 zil_clean(zilog_t *zilog, uint64_t synced_txg)
 {
 	itxg_t *itxg = &zilog->zl_itxg[synced_txg & TXG_MASK];
 	itxs_t *clean_me;
 
 	mutex_enter(&itxg->itxg_lock);
 	if (itxg->itxg_itxs == NULL || itxg->itxg_txg == ZILTEST_TXG) {
 		mutex_exit(&itxg->itxg_lock);
 		return;
 	}
 	ASSERT3U(itxg->itxg_txg, <=, synced_txg);
 	ASSERT(itxg->itxg_txg != 0);
 	ASSERT(zilog->zl_clean_taskq != NULL);
 	clean_me = itxg->itxg_itxs;
 	itxg->itxg_itxs = NULL;
 	itxg->itxg_txg = 0;
 	mutex_exit(&itxg->itxg_lock);
 	/*
 	 * Preferably start a task queue to free up the old itxs but
 	 * if taskq_dispatch can't allocate resources to do that then
 	 * free it in-line. This should be rare. Note, using TQ_SLEEP
 	 * created a bad performance problem.
 	 */
 	if (taskq_dispatch(zilog->zl_clean_taskq,
 	    (void (*)(void *))zil_itxg_clean, clean_me, TQ_NOSLEEP) == 0)
 		zil_itxg_clean(clean_me);
 }
 
 /*
  * Get the list of itxs to commit into zl_itx_commit_list.
  */
 static void
 zil_get_commit_list(zilog_t *zilog)
 {
 	uint64_t otxg, txg;
 	list_t *commit_list = &zilog->zl_itx_commit_list;
 
 	if (spa_freeze_txg(zilog->zl_spa) != UINT64_MAX) /* ziltest support */
 		otxg = ZILTEST_TXG;
 	else
 		otxg = spa_last_synced_txg(zilog->zl_spa) + 1;
 
+	/*
+	 * This is inherently racy, since there is nothing to prevent
+	 * the last synced txg from changing. That's okay since we'll
+	 * only commit things in the future.
+	 */
 	for (txg = otxg; txg < (otxg + TXG_CONCURRENT_STATES); txg++) {
 		itxg_t *itxg = &zilog->zl_itxg[txg & TXG_MASK];
 
 		mutex_enter(&itxg->itxg_lock);
 		if (itxg->itxg_txg != txg) {
 			mutex_exit(&itxg->itxg_lock);
 			continue;
 		}
 
+		/*
+		 * If we're adding itx records to the zl_itx_commit_list,
+		 * then the zil better be dirty in this "txg". We can assert
+		 * that here since we're holding the itxg_lock which will
+		 * prevent spa_sync from cleaning it. Once we add the itxs
+		 * to the zl_itx_commit_list we must commit it to disk even
+		 * if it's unnecessary (i.e. the txg was synced).
+		 */
+		ASSERT(zilog_is_dirty_in_txg(zilog, txg) ||
+		    spa_freeze_txg(zilog->zl_spa) != UINT64_MAX);
 		list_move_tail(commit_list, &itxg->itxg_itxs->i_sync_list);
 
 		mutex_exit(&itxg->itxg_lock);
 	}
 }
 
 /*
  * Move the async itxs for a specified object to commit into sync lists.
  */
 void
 zil_async_to_sync(zilog_t *zilog, uint64_t foid)
 {
 	uint64_t otxg, txg;
 	itx_async_node_t *ian;
 	avl_tree_t *t;
 	avl_index_t where;
 
 	if (spa_freeze_txg(zilog->zl_spa) != UINT64_MAX) /* ziltest support */
 		otxg = ZILTEST_TXG;
 	else
 		otxg = spa_last_synced_txg(zilog->zl_spa) + 1;
 
+	/*
+	 * This is inherently racy, since there is nothing to prevent
+	 * the last synced txg from changing.
+	 */
 	for (txg = otxg; txg < (otxg + TXG_CONCURRENT_STATES); txg++) {
 		itxg_t *itxg = &zilog->zl_itxg[txg & TXG_MASK];
 
 		mutex_enter(&itxg->itxg_lock);
 		if (itxg->itxg_txg != txg) {
 			mutex_exit(&itxg->itxg_lock);
 			continue;
 		}
 
 		/*
 		 * If a foid is specified then find that node and append its
 		 * list. Otherwise walk the tree appending all the lists
 		 * to the sync list. We add to the end rather than the
 		 * beginning to ensure the create has happened.
 		 */
 		t = &itxg->itxg_itxs->i_async_tree;
 		if (foid != 0) {
 			ian = avl_find(t, &foid, &where);
 			if (ian != NULL) {
 				list_move_tail(&itxg->itxg_itxs->i_sync_list,
 				    &ian->ia_list);
 			}
 		} else {
 			void *cookie = NULL;
 
 			while ((ian = avl_destroy_nodes(t, &cookie)) != NULL) {
 				list_move_tail(&itxg->itxg_itxs->i_sync_list,
 				    &ian->ia_list);
 				list_destroy(&ian->ia_list);
 				kmem_free(ian, sizeof (itx_async_node_t));
 			}
 		}
 		mutex_exit(&itxg->itxg_lock);
 	}
 }
 
 static void
 zil_commit_writer(zilog_t *zilog)
 {
 	uint64_t txg;
 	itx_t *itx;
 	lwb_t *lwb;
 	spa_t *spa = zilog->zl_spa;
 	int error = 0;
 
 	ASSERT(zilog->zl_root_zio == NULL);
 
 	mutex_exit(&zilog->zl_lock);
 
 	zil_get_commit_list(zilog);
 
 	/*
 	 * Return if there's nothing to commit before we dirty the fs by
 	 * calling zil_create().
 	 */
 	if (list_head(&zilog->zl_itx_commit_list) == NULL) {
 		mutex_enter(&zilog->zl_lock);
 		return;
 	}
 
 	if (zilog->zl_suspend) {
 		lwb = NULL;
 	} else {
 		lwb = list_tail(&zilog->zl_lwb_list);
 		if (lwb == NULL)
 			lwb = zil_create(zilog);
 	}
 
 	DTRACE_PROBE1(zil__cw1, zilog_t *, zilog);
 	while (itx = list_head(&zilog->zl_itx_commit_list)) {
 		txg = itx->itx_lr.lrc_txg;
-		ASSERT(txg);
+		ASSERT3U(txg, !=, 0);
 
+		/*
+		 * This is inherently racy and may result in us writing
+		 * out a log block for a txg that was just synced. This is
+		 * ok since we'll end cleaning up that log block the next
+		 * time we call zil_sync().
+		 */
 		if (txg > spa_last_synced_txg(spa) || txg > spa_freeze_txg(spa))
 			lwb = zil_lwb_commit(zilog, itx, lwb);
 		list_remove(&zilog->zl_itx_commit_list, itx);
 		kmem_free(itx, offsetof(itx_t, itx_lr)
 		    + itx->itx_lr.lrc_reclen);
 	}
 	DTRACE_PROBE1(zil__cw2, zilog_t *, zilog);
 
 	/* write the last block out */
 	if (lwb != NULL && lwb->lwb_zio != NULL)
 		lwb = zil_lwb_write_start(zilog, lwb);
 
 	zilog->zl_cur_used = 0;
 
 	/*
 	 * Wait if necessary for the log blocks to be on stable storage.
 	 */
 	if (zilog->zl_root_zio) {
 		error = zio_wait(zilog->zl_root_zio);
 		zilog->zl_root_zio = NULL;
 		zil_flush_vdevs(zilog);
 	}
 
 	if (error || lwb == NULL)
 		txg_wait_synced(zilog->zl_dmu_pool, 0);
 
 	mutex_enter(&zilog->zl_lock);
 
 	/*
 	 * Remember the highest committed log sequence number for ztest.
 	 * We only update this value when all the log writes succeeded,
 	 * because ztest wants to ASSERT that it got the whole log chain.
 	 */
 	if (error == 0 && lwb != NULL)
 		zilog->zl_commit_lr_seq = zilog->zl_lr_seq;
 }
 
 /*
  * Commit zfs transactions to stable storage.
  * If foid is 0 push out all transactions, otherwise push only those
  * for that object or might reference that object.
  *
  * itxs are committed in batches. In a heavily stressed zil there will be
  * a commit writer thread who is writing out a bunch of itxs to the log
  * for a set of committing threads (cthreads) in the same batch as the writer.
  * Those cthreads are all waiting on the same cv for that batch.
  *
  * There will also be a different and growing batch of threads that are
  * waiting to commit (qthreads). When the committing batch completes
  * a transition occurs such that the cthreads exit and the qthreads become
  * cthreads. One of the new cthreads becomes the writer thread for the
  * batch. Any new threads arriving become new qthreads.
  *
  * Only 2 condition variables are needed and there's no transition
  * between the two cvs needed. They just flip-flop between qthreads
  * and cthreads.
  *
  * Using this scheme we can efficiently wakeup up only those threads
  * that have been committed.
  */
 void
 zil_commit(zilog_t *zilog, uint64_t foid)
 {
 	uint64_t mybatch;
 
 	if (zilog->zl_sync == ZFS_SYNC_DISABLED)
 		return;
 
 	/* move the async itxs for the foid to the sync queues */
 	zil_async_to_sync(zilog, foid);
 
 	mutex_enter(&zilog->zl_lock);
 	mybatch = zilog->zl_next_batch;
 	while (zilog->zl_writer) {
 		cv_wait(&zilog->zl_cv_batch[mybatch & 1], &zilog->zl_lock);
 		if (mybatch <= zilog->zl_com_batch) {
 			mutex_exit(&zilog->zl_lock);
 			return;
 		}
 	}
 
 	zilog->zl_next_batch++;
 	zilog->zl_writer = B_TRUE;
 	zil_commit_writer(zilog);
 	zilog->zl_com_batch = mybatch;
 	zilog->zl_writer = B_FALSE;
 	mutex_exit(&zilog->zl_lock);
 
 	/* wake up one thread to become the next writer */
 	cv_signal(&zilog->zl_cv_batch[(mybatch+1) & 1]);
 
 	/* wake up all threads waiting for this batch to be committed */
 	cv_broadcast(&zilog->zl_cv_batch[mybatch & 1]);
 }
 
 /*
  * Called in syncing context to free committed log blocks and update log header.
  */
 void
 zil_sync(zilog_t *zilog, dmu_tx_t *tx)
 {
 	zil_header_t *zh = zil_header_in_syncing_context(zilog);
 	uint64_t txg = dmu_tx_get_txg(tx);
 	spa_t *spa = zilog->zl_spa;
 	uint64_t *replayed_seq = &zilog->zl_replayed_seq[txg & TXG_MASK];
 	lwb_t *lwb;
 
 	/*
 	 * We don't zero out zl_destroy_txg, so make sure we don't try
 	 * to destroy it twice.
 	 */
 	if (spa_sync_pass(spa) != 1)
 		return;
 
 	mutex_enter(&zilog->zl_lock);
 
 	ASSERT(zilog->zl_stop_sync == 0);
 
 	if (*replayed_seq != 0) {
 		ASSERT(zh->zh_replay_seq < *replayed_seq);
 		zh->zh_replay_seq = *replayed_seq;
 		*replayed_seq = 0;
 	}
 
 	if (zilog->zl_destroy_txg == txg) {
 		blkptr_t blk = zh->zh_log;
 
 		ASSERT(list_head(&zilog->zl_lwb_list) == NULL);
 
 		bzero(zh, sizeof (zil_header_t));
 		bzero(zilog->zl_replayed_seq, sizeof (zilog->zl_replayed_seq));
 
 		if (zilog->zl_keep_first) {
 			/*
 			 * If this block was part of log chain that couldn't
 			 * be claimed because a device was missing during
 			 * zil_claim(), but that device later returns,
 			 * then this block could erroneously appear valid.
 			 * To guard against this, assign a new GUID to the new
 			 * log chain so it doesn't matter what blk points to.
 			 */
 			zil_init_log_chain(zilog, &blk);
 			zh->zh_log = blk;
 		}
 	}
 
 	while ((lwb = list_head(&zilog->zl_lwb_list)) != NULL) {
 		zh->zh_log = lwb->lwb_blk;
 		if (lwb->lwb_buf != NULL || lwb->lwb_max_txg > txg)
 			break;
 		list_remove(&zilog->zl_lwb_list, lwb);
 		zio_free_zil(spa, txg, &lwb->lwb_blk);
 		kmem_cache_free(zil_lwb_cache, lwb);
 
 		/*
 		 * If we don't have anything left in the lwb list then
 		 * we've had an allocation failure and we need to zero
 		 * out the zil_header blkptr so that we don't end
 		 * up freeing the same block twice.
 		 */
 		if (list_head(&zilog->zl_lwb_list) == NULL)
 			BP_ZERO(&zh->zh_log);
 	}
 	mutex_exit(&zilog->zl_lock);
 }
 
 void
 zil_init(void)
 {
 	zil_lwb_cache = kmem_cache_create("zil_lwb_cache",
 	    sizeof (struct lwb), 0, NULL, NULL, NULL, NULL, NULL, 0);
 }
 
 void
 zil_fini(void)
 {
 	kmem_cache_destroy(zil_lwb_cache);
 }
 
 void
 zil_set_sync(zilog_t *zilog, uint64_t sync)
 {
 	zilog->zl_sync = sync;
 }
 
 void
 zil_set_logbias(zilog_t *zilog, uint64_t logbias)
 {
 	zilog->zl_logbias = logbias;
 }
 
 zilog_t *
 zil_alloc(objset_t *os, zil_header_t *zh_phys)
 {
 	zilog_t *zilog;
 
 	zilog = kmem_zalloc(sizeof (zilog_t), KM_SLEEP);
 
 	zilog->zl_header = zh_phys;
 	zilog->zl_os = os;
 	zilog->zl_spa = dmu_objset_spa(os);
 	zilog->zl_dmu_pool = dmu_objset_pool(os);
 	zilog->zl_destroy_txg = TXG_INITIAL - 1;
 	zilog->zl_logbias = dmu_objset_logbias(os);
 	zilog->zl_sync = dmu_objset_syncprop(os);
 	zilog->zl_next_batch = 1;
 
 	mutex_init(&zilog->zl_lock, NULL, MUTEX_DEFAULT, NULL);
 
 	for (int i = 0; i < TXG_SIZE; i++) {
 		mutex_init(&zilog->zl_itxg[i].itxg_lock, NULL,
 		    MUTEX_DEFAULT, NULL);
 	}
 
 	list_create(&zilog->zl_lwb_list, sizeof (lwb_t),
 	    offsetof(lwb_t, lwb_node));
 
 	list_create(&zilog->zl_itx_commit_list, sizeof (itx_t),
 	    offsetof(itx_t, itx_node));
 
 	mutex_init(&zilog->zl_vdev_lock, NULL, MUTEX_DEFAULT, NULL);
 
 	avl_create(&zilog->zl_vdev_tree, zil_vdev_compare,
 	    sizeof (zil_vdev_node_t), offsetof(zil_vdev_node_t, zv_node));
 
 	cv_init(&zilog->zl_cv_writer, NULL, CV_DEFAULT, NULL);
 	cv_init(&zilog->zl_cv_suspend, NULL, CV_DEFAULT, NULL);
 	cv_init(&zilog->zl_cv_batch[0], NULL, CV_DEFAULT, NULL);
 	cv_init(&zilog->zl_cv_batch[1], NULL, CV_DEFAULT, NULL);
 
 	return (zilog);
 }
 
 void
 zil_free(zilog_t *zilog)
 {
 	zilog->zl_stop_sync = 1;
 
 	ASSERT0(zilog->zl_suspend);
 	ASSERT0(zilog->zl_suspending);
 
 	ASSERT(list_is_empty(&zilog->zl_lwb_list));
 	list_destroy(&zilog->zl_lwb_list);
 
 	avl_destroy(&zilog->zl_vdev_tree);
 	mutex_destroy(&zilog->zl_vdev_lock);
 
 	ASSERT(list_is_empty(&zilog->zl_itx_commit_list));
 	list_destroy(&zilog->zl_itx_commit_list);
 
 	for (int i = 0; i < TXG_SIZE; i++) {
 		/*
 		 * It's possible for an itx to be generated that doesn't dirty
 		 * a txg (e.g. ztest TX_TRUNCATE). So there's no zil_clean()
 		 * callback to remove the entry. We remove those here.
 		 *
 		 * Also free up the ziltest itxs.
 		 */
 		if (zilog->zl_itxg[i].itxg_itxs)
 			zil_itxg_clean(zilog->zl_itxg[i].itxg_itxs);
 		mutex_destroy(&zilog->zl_itxg[i].itxg_lock);
 	}
 
 	mutex_destroy(&zilog->zl_lock);
 
 	cv_destroy(&zilog->zl_cv_writer);
 	cv_destroy(&zilog->zl_cv_suspend);
 	cv_destroy(&zilog->zl_cv_batch[0]);
 	cv_destroy(&zilog->zl_cv_batch[1]);
 
 	kmem_free(zilog, sizeof (zilog_t));
 }
 
 /*
  * Open an intent log.
  */
 zilog_t *
 zil_open(objset_t *os, zil_get_data_t *get_data)
 {
 	zilog_t *zilog = dmu_objset_zil(os);
 
 	ASSERT(zilog->zl_clean_taskq == NULL);
 	ASSERT(zilog->zl_get_data == NULL);
 	ASSERT(list_is_empty(&zilog->zl_lwb_list));
 
 	zilog->zl_get_data = get_data;
 	zilog->zl_clean_taskq = taskq_create("zil_clean", 1, minclsyspri,
 	    2, 2, TASKQ_PREPOPULATE);
 
 	return (zilog);
 }
 
 /*
  * Close an intent log.
  */
 void
 zil_close(zilog_t *zilog)
 {
 	lwb_t *lwb;
 	uint64_t txg = 0;
 
 	zil_commit(zilog, 0); /* commit all itx */
 
 	/*
 	 * The lwb_max_txg for the stubby lwb will reflect the last activity
 	 * for the zil.  After a txg_wait_synced() on the txg we know all the
 	 * callbacks have occurred that may clean the zil.  Only then can we
 	 * destroy the zl_clean_taskq.
 	 */
 	mutex_enter(&zilog->zl_lock);
 	lwb = list_tail(&zilog->zl_lwb_list);
 	if (lwb != NULL)
 		txg = lwb->lwb_max_txg;
 	mutex_exit(&zilog->zl_lock);
 	if (txg)
 		txg_wait_synced(zilog->zl_dmu_pool, txg);
-	ASSERT(!zilog_is_dirty(zilog));
+
+	if (zilog_is_dirty(zilog))
+		zfs_dbgmsg("zil (%p) is dirty, txg %llu", zilog, txg);
+	VERIFY(!zilog_is_dirty(zilog));
 
 	taskq_destroy(zilog->zl_clean_taskq);
 	zilog->zl_clean_taskq = NULL;
 	zilog->zl_get_data = NULL;
 
 	/*
 	 * We should have only one LWB left on the list; remove it now.
 	 */
 	mutex_enter(&zilog->zl_lock);
 	lwb = list_head(&zilog->zl_lwb_list);
 	if (lwb != NULL) {
 		ASSERT(lwb == list_tail(&zilog->zl_lwb_list));
 		list_remove(&zilog->zl_lwb_list, lwb);
 		zio_buf_free(lwb->lwb_buf, lwb->lwb_sz);
 		kmem_cache_free(zil_lwb_cache, lwb);
 	}
 	mutex_exit(&zilog->zl_lock);
 }
 
 static char *suspend_tag = "zil suspending";
 
 /*
  * Suspend an intent log.  While in suspended mode, we still honor
  * synchronous semantics, but we rely on txg_wait_synced() to do it.
  * On old version pools, we suspend the log briefly when taking a
  * snapshot so that it will have an empty intent log.
  *
  * Long holds are not really intended to be used the way we do here --
  * held for such a short time.  A concurrent caller of dsl_dataset_long_held()
  * could fail.  Therefore we take pains to only put a long hold if it is
  * actually necessary.  Fortunately, it will only be necessary if the
  * objset is currently mounted (or the ZVOL equivalent).  In that case it
  * will already have a long hold, so we are not really making things any worse.
  *
  * Ideally, we would locate the existing long-holder (i.e. the zfsvfs_t or
  * zvol_state_t), and use their mechanism to prevent their hold from being
  * dropped (e.g. VFS_HOLD()).  However, that would be even more pain for
  * very little gain.
  *
  * if cookiep == NULL, this does both the suspend & resume.
  * Otherwise, it returns with the dataset "long held", and the cookie
  * should be passed into zil_resume().
  */
 int
 zil_suspend(const char *osname, void **cookiep)
 {
 	objset_t *os;
 	zilog_t *zilog;
 	const zil_header_t *zh;
 	int error;
 
 	error = dmu_objset_hold(osname, suspend_tag, &os);
 	if (error != 0)
 		return (error);
 	zilog = dmu_objset_zil(os);
 
 	mutex_enter(&zilog->zl_lock);
 	zh = zilog->zl_header;
 
 	if (zh->zh_flags & ZIL_REPLAY_NEEDED) {		/* unplayed log */
 		mutex_exit(&zilog->zl_lock);
 		dmu_objset_rele(os, suspend_tag);
 		return (SET_ERROR(EBUSY));
 	}
 
 	/*
 	 * Don't put a long hold in the cases where we can avoid it.  This
 	 * is when there is no cookie so we are doing a suspend & resume
 	 * (i.e. called from zil_vdev_offline()), and there's nothing to do
 	 * for the suspend because it's already suspended, or there's no ZIL.
 	 */
 	if (cookiep == NULL && !zilog->zl_suspending &&
 	    (zilog->zl_suspend > 0 || BP_IS_HOLE(&zh->zh_log))) {
 		mutex_exit(&zilog->zl_lock);
 		dmu_objset_rele(os, suspend_tag);
 		return (0);
 	}
 
 	dsl_dataset_long_hold(dmu_objset_ds(os), suspend_tag);
 	dsl_pool_rele(dmu_objset_pool(os), suspend_tag);
 
 	zilog->zl_suspend++;
 
 	if (zilog->zl_suspend > 1) {
 		/*
 		 * Someone else is already suspending it.
 		 * Just wait for them to finish.
 		 */
 
 		while (zilog->zl_suspending)
 			cv_wait(&zilog->zl_cv_suspend, &zilog->zl_lock);
 		mutex_exit(&zilog->zl_lock);
 
 		if (cookiep == NULL)
 			zil_resume(os);
 		else
 			*cookiep = os;
 		return (0);
 	}
 
 	/*
 	 * If there is no pointer to an on-disk block, this ZIL must not
 	 * be active (e.g. filesystem not mounted), so there's nothing
 	 * to clean up.
 	 */
 	if (BP_IS_HOLE(&zh->zh_log)) {
 		ASSERT(cookiep != NULL); /* fast path already handled */
 
 		*cookiep = os;
 		mutex_exit(&zilog->zl_lock);
 		return (0);
 	}
 
 	zilog->zl_suspending = B_TRUE;
 	mutex_exit(&zilog->zl_lock);
 
 	zil_commit(zilog, 0);
 
 	zil_destroy(zilog, B_FALSE);
 
 	mutex_enter(&zilog->zl_lock);
 	zilog->zl_suspending = B_FALSE;
 	cv_broadcast(&zilog->zl_cv_suspend);
 	mutex_exit(&zilog->zl_lock);
 
 	if (cookiep == NULL)
 		zil_resume(os);
 	else
 		*cookiep = os;
 	return (0);
 }
 
 void
 zil_resume(void *cookie)
 {
 	objset_t *os = cookie;
 	zilog_t *zilog = dmu_objset_zil(os);
 
 	mutex_enter(&zilog->zl_lock);
 	ASSERT(zilog->zl_suspend != 0);
 	zilog->zl_suspend--;
 	mutex_exit(&zilog->zl_lock);
 	dsl_dataset_long_rele(dmu_objset_ds(os), suspend_tag);
 	dsl_dataset_rele(dmu_objset_ds(os), suspend_tag);
 }
 
 typedef struct zil_replay_arg {
 	zil_replay_func_t **zr_replay;
 	void		*zr_arg;
 	boolean_t	zr_byteswap;
 	char		*zr_lr;
 } zil_replay_arg_t;
 
 static int
 zil_replay_error(zilog_t *zilog, lr_t *lr, int error)
 {
 	char name[ZFS_MAX_DATASET_NAME_LEN];
 
 	zilog->zl_replaying_seq--;	/* didn't actually replay this one */
 
 	dmu_objset_name(zilog->zl_os, name);
 
 	cmn_err(CE_WARN, "ZFS replay transaction error %d, "
 	    "dataset %s, seq 0x%llx, txtype %llu %s\n", error, name,
 	    (u_longlong_t)lr->lrc_seq,
 	    (u_longlong_t)(lr->lrc_txtype & ~TX_CI),
 	    (lr->lrc_txtype & TX_CI) ? "CI" : "");
 
 	return (error);
 }
 
 static int
 zil_replay_log_record(zilog_t *zilog, lr_t *lr, void *zra, uint64_t claim_txg)
 {
 	zil_replay_arg_t *zr = zra;
 	const zil_header_t *zh = zilog->zl_header;
 	uint64_t reclen = lr->lrc_reclen;
 	uint64_t txtype = lr->lrc_txtype;
 	int error = 0;
 
 	zilog->zl_replaying_seq = lr->lrc_seq;
 
 	if (lr->lrc_seq <= zh->zh_replay_seq)	/* already replayed */
 		return (0);
 
 	if (lr->lrc_txg < claim_txg)		/* already committed */
 		return (0);
 
 	/* Strip case-insensitive bit, still present in log record */
 	txtype &= ~TX_CI;
 
 	if (txtype == 0 || txtype >= TX_MAX_TYPE)
 		return (zil_replay_error(zilog, lr, EINVAL));
 
 	/*
 	 * If this record type can be logged out of order, the object
 	 * (lr_foid) may no longer exist.  That's legitimate, not an error.
 	 */
 	if (TX_OOO(txtype)) {
 		error = dmu_object_info(zilog->zl_os,
 		    ((lr_ooo_t *)lr)->lr_foid, NULL);
 		if (error == ENOENT || error == EEXIST)
 			return (0);
 	}
 
 	/*
 	 * Make a copy of the data so we can revise and extend it.
 	 */
 	bcopy(lr, zr->zr_lr, reclen);
 
 	/*
 	 * If this is a TX_WRITE with a blkptr, suck in the data.
 	 */
 	if (txtype == TX_WRITE && reclen == sizeof (lr_write_t)) {
 		error = zil_read_log_data(zilog, (lr_write_t *)lr,
 		    zr->zr_lr + reclen);
 		if (error != 0)
 			return (zil_replay_error(zilog, lr, error));
 	}
 
 	/*
 	 * The log block containing this lr may have been byteswapped
 	 * so that we can easily examine common fields like lrc_txtype.
 	 * However, the log is a mix of different record types, and only the
 	 * replay vectors know how to byteswap their records.  Therefore, if
 	 * the lr was byteswapped, undo it before invoking the replay vector.
 	 */
 	if (zr->zr_byteswap)
 		byteswap_uint64_array(zr->zr_lr, reclen);
 
 	/*
 	 * We must now do two things atomically: replay this log record,
 	 * and update the log header sequence number to reflect the fact that
 	 * we did so. At the end of each replay function the sequence number
 	 * is updated if we are in replay mode.
 	 */
 	error = zr->zr_replay[txtype](zr->zr_arg, zr->zr_lr, zr->zr_byteswap);
 	if (error != 0) {
 		/*
 		 * The DMU's dnode layer doesn't see removes until the txg
 		 * commits, so a subsequent claim can spuriously fail with
 		 * EEXIST. So if we receive any error we try syncing out
 		 * any removes then retry the transaction.  Note that we
 		 * specify B_FALSE for byteswap now, so we don't do it twice.
 		 */
 		txg_wait_synced(spa_get_dsl(zilog->zl_spa), 0);
 		error = zr->zr_replay[txtype](zr->zr_arg, zr->zr_lr, B_FALSE);
 		if (error != 0)
 			return (zil_replay_error(zilog, lr, error));
 	}
 	return (0);
 }
 
 /* ARGSUSED */
 static int
 zil_incr_blks(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg)
 {
 	zilog->zl_replay_blks++;
 
 	return (0);
 }
 
 /*
  * If this dataset has a non-empty intent log, replay it and destroy it.
  */
 void
 zil_replay(objset_t *os, void *arg, zil_replay_func_t *replay_func[TX_MAX_TYPE])
 {
 	zilog_t *zilog = dmu_objset_zil(os);
 	const zil_header_t *zh = zilog->zl_header;
 	zil_replay_arg_t zr;
 
 	if ((zh->zh_flags & ZIL_REPLAY_NEEDED) == 0) {
 		zil_destroy(zilog, B_TRUE);
 		return;
 	}
 
 	zr.zr_replay = replay_func;
 	zr.zr_arg = arg;
 	zr.zr_byteswap = BP_SHOULD_BYTESWAP(&zh->zh_log);
 	zr.zr_lr = kmem_alloc(2 * SPA_MAXBLOCKSIZE, KM_SLEEP);
 
 	/*
 	 * Wait for in-progress removes to sync before starting replay.
 	 */
 	txg_wait_synced(zilog->zl_dmu_pool, 0);
 
 	zilog->zl_replay = B_TRUE;
 	zilog->zl_replay_time = ddi_get_lbolt();
 	ASSERT(zilog->zl_replay_blks == 0);
 	(void) zil_parse(zilog, zil_incr_blks, zil_replay_log_record, &zr,
 	    zh->zh_claim_txg);
 	kmem_free(zr.zr_lr, 2 * SPA_MAXBLOCKSIZE);
 
 	zil_destroy(zilog, B_FALSE);
 	txg_wait_synced(zilog->zl_dmu_pool, zilog->zl_destroy_txg);
 	zilog->zl_replay = B_FALSE;
 }
 
 boolean_t
 zil_replaying(zilog_t *zilog, dmu_tx_t *tx)
 {
 	if (zilog->zl_sync == ZFS_SYNC_DISABLED)
 		return (B_TRUE);
 
 	if (zilog->zl_replay) {
 		dsl_dataset_dirty(dmu_objset_ds(zilog->zl_os), tx);
 		zilog->zl_replayed_seq[dmu_tx_get_txg(tx) & TXG_MASK] =
 		    zilog->zl_replaying_seq;
 		return (B_TRUE);
 	}
 
 	return (B_FALSE);
 }
 
 /* ARGSUSED */
 int
 zil_vdev_offline(const char *osname, void *arg)
 {
 	int error;
 
 	error = zil_suspend(osname, NULL);
 	if (error != 0)
 		return (SET_ERROR(EEXIST));
 	return (0);
 }
Index: projects/clang391-import/sys/cddl/contrib/opensolaris
===================================================================
--- projects/clang391-import/sys/cddl/contrib/opensolaris	(revision 309262)
+++ projects/clang391-import/sys/cddl/contrib/opensolaris	(revision 309263)

Property changes on: projects/clang391-import/sys/cddl/contrib/opensolaris
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,2 ##
   Merged /head/sys/cddl/contrib/opensolaris:r309166-309262
   Merged /vendor-sys/illumos/dist:r309249
Index: projects/clang391-import/sys/dev/ahci/ahci.c
===================================================================
--- projects/clang391-import/sys/dev/ahci/ahci.c	(revision 309262)
+++ projects/clang391-import/sys/dev/ahci/ahci.c	(revision 309263)
@@ -1,2737 +1,2733 @@
 /*-
  * Copyright (c) 2009-2012 Alexander Motin <mav@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer,
  *    without modification, immediately at the beginning of the file.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/module.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/bus.h>
 #include <sys/conf.h>
 #include <sys/endian.h>
 #include <sys/malloc.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <machine/stdarg.h>
 #include <machine/resource.h>
 #include <machine/bus.h>
 #include <sys/rman.h>
 #include "ahci.h"
 
 #include <cam/cam.h>
 #include <cam/cam_ccb.h>
 #include <cam/cam_sim.h>
 #include <cam/cam_xpt_sim.h>
 #include <cam/cam_debug.h>
 
 /* local prototypes */
 static void ahci_intr(void *data);
 static void ahci_intr_one(void *data);
 static void ahci_intr_one_edge(void *data);
 static int ahci_ch_init(device_t dev);
 static int ahci_ch_deinit(device_t dev);
 static int ahci_ch_suspend(device_t dev);
 static int ahci_ch_resume(device_t dev);
 static void ahci_ch_pm(void *arg);
 static void ahci_ch_intr(void *arg);
 static void ahci_ch_intr_direct(void *arg);
 static void ahci_ch_intr_main(struct ahci_channel *ch, uint32_t istatus);
 static void ahci_begin_transaction(struct ahci_channel *ch, union ccb *ccb);
 static void ahci_dmasetprd(void *arg, bus_dma_segment_t *segs, int nsegs, int error);
 static void ahci_execute_transaction(struct ahci_slot *slot);
 static void ahci_timeout(struct ahci_slot *slot);
 static void ahci_end_transaction(struct ahci_slot *slot, enum ahci_err_type et);
 static int ahci_setup_fis(struct ahci_channel *ch, struct ahci_cmd_tab *ctp, union ccb *ccb, int tag);
 static void ahci_dmainit(device_t dev);
 static void ahci_dmasetupc_cb(void *xsc, bus_dma_segment_t *segs, int nsegs, int error);
 static void ahci_dmafini(device_t dev);
 static void ahci_slotsalloc(device_t dev);
 static void ahci_slotsfree(device_t dev);
 static void ahci_reset(struct ahci_channel *ch);
 static void ahci_start(struct ahci_channel *ch, int fbs);
 static void ahci_stop(struct ahci_channel *ch);
 static void ahci_clo(struct ahci_channel *ch);
 static void ahci_start_fr(struct ahci_channel *ch);
 static void ahci_stop_fr(struct ahci_channel *ch);
 
 static int ahci_sata_connect(struct ahci_channel *ch);
 static int ahci_sata_phy_reset(struct ahci_channel *ch);
 static int ahci_wait_ready(struct ahci_channel *ch, int t, int t0);
 
 static void ahci_issue_recovery(struct ahci_channel *ch);
 static void ahci_process_read_log(struct ahci_channel *ch, union ccb *ccb);
 static void ahci_process_request_sense(struct ahci_channel *ch, union ccb *ccb);
 
 static void ahciaction(struct cam_sim *sim, union ccb *ccb);
 static void ahcipoll(struct cam_sim *sim);
 
 static MALLOC_DEFINE(M_AHCI, "AHCI driver", "AHCI driver data buffers");
 
 #define recovery_type		spriv_field0
 #define RECOVERY_NONE		0
 #define RECOVERY_READ_LOG	1
 #define RECOVERY_REQUEST_SENSE	2
 #define recovery_slot		spriv_field1
 
 int
 ahci_ctlr_setup(device_t dev)
 {
 	struct ahci_controller *ctlr = device_get_softc(dev);
 	/* Clear interrupts */
 	ATA_OUTL(ctlr->r_mem, AHCI_IS, ATA_INL(ctlr->r_mem, AHCI_IS));
 	/* Configure CCC */
 	if (ctlr->ccc) {
 		ATA_OUTL(ctlr->r_mem, AHCI_CCCP, ATA_INL(ctlr->r_mem, AHCI_PI));
 		ATA_OUTL(ctlr->r_mem, AHCI_CCCC,
 		    (ctlr->ccc << AHCI_CCCC_TV_SHIFT) |
 		    (4 << AHCI_CCCC_CC_SHIFT) |
 		    AHCI_CCCC_EN);
 		ctlr->cccv = (ATA_INL(ctlr->r_mem, AHCI_CCCC) &
 		    AHCI_CCCC_INT_MASK) >> AHCI_CCCC_INT_SHIFT;
 		if (bootverbose) {
 			device_printf(dev,
 			    "CCC with %dms/4cmd enabled on vector %d\n",
 			    ctlr->ccc, ctlr->cccv);
 		}
 	}
 	/* Enable AHCI interrupts */
 	ATA_OUTL(ctlr->r_mem, AHCI_GHC,
 	    ATA_INL(ctlr->r_mem, AHCI_GHC) | AHCI_GHC_IE);
 	return (0);
 }
 
 int
 ahci_ctlr_reset(device_t dev)
 {
 	struct ahci_controller *ctlr = device_get_softc(dev);
 	int timeout;
 
 	/* Enable AHCI mode */
 	ATA_OUTL(ctlr->r_mem, AHCI_GHC, AHCI_GHC_AE);
 	/* Reset AHCI controller */
 	ATA_OUTL(ctlr->r_mem, AHCI_GHC, AHCI_GHC_AE|AHCI_GHC_HR);
 	for (timeout = 1000; timeout > 0; timeout--) {
 		DELAY(1000);
 		if ((ATA_INL(ctlr->r_mem, AHCI_GHC) & AHCI_GHC_HR) == 0)
 			break;
 	}
 	if (timeout == 0) {
 		device_printf(dev, "AHCI controller reset failure\n");
 		return (ENXIO);
 	}
 	/* Reenable AHCI mode */
 	ATA_OUTL(ctlr->r_mem, AHCI_GHC, AHCI_GHC_AE);
 
 	if (ctlr->quirks & AHCI_Q_RESTORE_CAP) {
 		/*
 		 * Restore capability field.
 		 * This is write to a read-only register to restore its state.
 		 * On fully standard-compliant hardware this is not needed and
 		 * this operation shall not take place. See ahci_pci.c for
 		 * platforms using this quirk.
 		 */
 		ATA_OUTL(ctlr->r_mem, AHCI_CAP, ctlr->caps);
 	}
 
 	return (0);
 }
 
 
 int
 ahci_attach(device_t dev)
 {
 	struct ahci_controller *ctlr = device_get_softc(dev);
 	int error, i, speed, unit;
 	uint32_t u, version;
 	device_t child;
 
 	ctlr->dev = dev;
 	ctlr->ccc = 0;
 	resource_int_value(device_get_name(dev),
 	    device_get_unit(dev), "ccc", &ctlr->ccc);
 
 	/* Setup our own memory management for channels. */
 	ctlr->sc_iomem.rm_start = rman_get_start(ctlr->r_mem);
 	ctlr->sc_iomem.rm_end = rman_get_end(ctlr->r_mem);
 	ctlr->sc_iomem.rm_type = RMAN_ARRAY;
 	ctlr->sc_iomem.rm_descr = "I/O memory addresses";
 	if ((error = rman_init(&ctlr->sc_iomem)) != 0) {
 		ahci_free_mem(dev);
 		return (error);
 	}
 	if ((error = rman_manage_region(&ctlr->sc_iomem,
 	    rman_get_start(ctlr->r_mem), rman_get_end(ctlr->r_mem))) != 0) {
 		ahci_free_mem(dev);
 		rman_fini(&ctlr->sc_iomem);
 		return (error);
 	}
 	/* Get the HW capabilities */
 	version = ATA_INL(ctlr->r_mem, AHCI_VS);
 	ctlr->caps = ATA_INL(ctlr->r_mem, AHCI_CAP);
 	if (version >= 0x00010200)
 		ctlr->caps2 = ATA_INL(ctlr->r_mem, AHCI_CAP2);
 	if (ctlr->caps & AHCI_CAP_EMS)
 		ctlr->capsem = ATA_INL(ctlr->r_mem, AHCI_EM_CTL);
 
 	if (ctlr->quirks & AHCI_Q_FORCE_PI) {
 		/*
 		 * Enable ports. 
 		 * The spec says that BIOS sets up bits corresponding to
 		 * available ports. On platforms where this information
 		 * is missing, the driver can define available ports on its own.
 		 */
 		int nports = (ctlr->caps & AHCI_CAP_NPMASK) + 1;
 		int nmask = (1 << nports) - 1;
 
 		ATA_OUTL(ctlr->r_mem, AHCI_PI, nmask);
 		device_printf(dev, "Forcing PI to %d ports (mask = %x)\n",
 		    nports, nmask);
 	}
 
 	ctlr->ichannels = ATA_INL(ctlr->r_mem, AHCI_PI);
 
 	/* Identify and set separate quirks for HBA and RAID f/w Marvells. */
 	if ((ctlr->quirks & AHCI_Q_ALTSIG) &&
 	    (ctlr->caps & AHCI_CAP_SPM) == 0)
 		ctlr->quirks |= AHCI_Q_NOBSYRES;
 
 	if (ctlr->quirks & AHCI_Q_1CH) {
 		ctlr->caps &= ~AHCI_CAP_NPMASK;
 		ctlr->ichannels &= 0x01;
 	}
 	if (ctlr->quirks & AHCI_Q_2CH) {
 		ctlr->caps &= ~AHCI_CAP_NPMASK;
 		ctlr->caps |= 1;
 		ctlr->ichannels &= 0x03;
 	}
 	if (ctlr->quirks & AHCI_Q_4CH) {
 		ctlr->caps &= ~AHCI_CAP_NPMASK;
 		ctlr->caps |= 3;
 		ctlr->ichannels &= 0x0f;
 	}
 	ctlr->channels = MAX(flsl(ctlr->ichannels),
 	    (ctlr->caps & AHCI_CAP_NPMASK) + 1);
 	if (ctlr->quirks & AHCI_Q_NOPMP)
 		ctlr->caps &= ~AHCI_CAP_SPM;
 	if (ctlr->quirks & AHCI_Q_NONCQ)
 		ctlr->caps &= ~AHCI_CAP_SNCQ;
 	if ((ctlr->caps & AHCI_CAP_CCCS) == 0)
 		ctlr->ccc = 0;
 	ctlr->emloc = ATA_INL(ctlr->r_mem, AHCI_EM_LOC);
 
 	/* Create controller-wide DMA tag. */
 	if (bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0,
 	    (ctlr->caps & AHCI_CAP_64BIT) ? BUS_SPACE_MAXADDR :
 	    BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL,
 	    BUS_SPACE_MAXSIZE, BUS_SPACE_UNRESTRICTED, BUS_SPACE_MAXSIZE,
 	    0, NULL, NULL, &ctlr->dma_tag)) {
 		ahci_free_mem(dev);
 		rman_fini(&ctlr->sc_iomem);
 		return (ENXIO);
 	}
 
 	ahci_ctlr_setup(dev);
 
 	/* Setup interrupts. */
 	if ((error = ahci_setup_interrupt(dev)) != 0) {
 		bus_dma_tag_destroy(ctlr->dma_tag);
 		ahci_free_mem(dev);
 		rman_fini(&ctlr->sc_iomem);
 		return (error);
 	}
 
 	i = 0;
 	for (u = ctlr->ichannels; u != 0; u >>= 1)
 		i += (u & 1);
 	ctlr->direct = (ctlr->msi && (ctlr->numirqs > 1 || i <= 3));
 	resource_int_value(device_get_name(dev), device_get_unit(dev),
 	    "direct", &ctlr->direct);
 	/* Announce HW capabilities. */
 	speed = (ctlr->caps & AHCI_CAP_ISS) >> AHCI_CAP_ISS_SHIFT;
 	device_printf(dev,
 		    "AHCI v%x.%02x with %d %sGbps ports, Port Multiplier %s%s\n",
 		    ((version >> 20) & 0xf0) + ((version >> 16) & 0x0f),
 		    ((version >> 4) & 0xf0) + (version & 0x0f),
 		    (ctlr->caps & AHCI_CAP_NPMASK) + 1,
 		    ((speed == 1) ? "1.5":((speed == 2) ? "3":
 		    ((speed == 3) ? "6":"?"))),
 		    (ctlr->caps & AHCI_CAP_SPM) ?
 		    "supported" : "not supported",
 		    (ctlr->caps & AHCI_CAP_FBSS) ?
 		    " with FBS" : "");
 	if (ctlr->quirks != 0) {
 		device_printf(dev, "quirks=0x%b\n", ctlr->quirks,
 		    AHCI_Q_BIT_STRING);
 	}
 	if (bootverbose) {
 		device_printf(dev, "Caps:%s%s%s%s%s%s%s%s %sGbps",
 		    (ctlr->caps & AHCI_CAP_64BIT) ? " 64bit":"",
 		    (ctlr->caps & AHCI_CAP_SNCQ) ? " NCQ":"",
 		    (ctlr->caps & AHCI_CAP_SSNTF) ? " SNTF":"",
 		    (ctlr->caps & AHCI_CAP_SMPS) ? " MPS":"",
 		    (ctlr->caps & AHCI_CAP_SSS) ? " SS":"",
 		    (ctlr->caps & AHCI_CAP_SALP) ? " ALP":"",
 		    (ctlr->caps & AHCI_CAP_SAL) ? " AL":"",
 		    (ctlr->caps & AHCI_CAP_SCLO) ? " CLO":"",
 		    ((speed == 1) ? "1.5":((speed == 2) ? "3":
 		    ((speed == 3) ? "6":"?"))));
 		printf("%s%s%s%s%s%s %dcmd%s%s%s %dports\n",
 		    (ctlr->caps & AHCI_CAP_SAM) ? " AM":"",
 		    (ctlr->caps & AHCI_CAP_SPM) ? " PM":"",
 		    (ctlr->caps & AHCI_CAP_FBSS) ? " FBS":"",
 		    (ctlr->caps & AHCI_CAP_PMD) ? " PMD":"",
 		    (ctlr->caps & AHCI_CAP_SSC) ? " SSC":"",
 		    (ctlr->caps & AHCI_CAP_PSC) ? " PSC":"",
 		    ((ctlr->caps & AHCI_CAP_NCS) >> AHCI_CAP_NCS_SHIFT) + 1,
 		    (ctlr->caps & AHCI_CAP_CCCS) ? " CCC":"",
 		    (ctlr->caps & AHCI_CAP_EMS) ? " EM":"",
 		    (ctlr->caps & AHCI_CAP_SXS) ? " eSATA":"",
 		    (ctlr->caps & AHCI_CAP_NPMASK) + 1);
 	}
 	if (bootverbose && version >= 0x00010200) {
 		device_printf(dev, "Caps2:%s%s%s%s%s%s\n",
 		    (ctlr->caps2 & AHCI_CAP2_DESO) ? " DESO":"",
 		    (ctlr->caps2 & AHCI_CAP2_SADM) ? " SADM":"",
 		    (ctlr->caps2 & AHCI_CAP2_SDS) ? " SDS":"",
 		    (ctlr->caps2 & AHCI_CAP2_APST) ? " APST":"",
 		    (ctlr->caps2 & AHCI_CAP2_NVMP) ? " NVMP":"",
 		    (ctlr->caps2 & AHCI_CAP2_BOH) ? " BOH":"");
 	}
 	/* Attach all channels on this controller */
 	for (unit = 0; unit < ctlr->channels; unit++) {
 		child = device_add_child(dev, "ahcich", -1);
 		if (child == NULL) {
 			device_printf(dev, "failed to add channel device\n");
 			continue;
 		}
 		device_set_ivars(child, (void *)(intptr_t)unit);
 		if ((ctlr->ichannels & (1 << unit)) == 0)
 			device_disable(child);
 	}
 	if (ctlr->caps & AHCI_CAP_EMS) {
 		child = device_add_child(dev, "ahciem", -1);
 		if (child == NULL)
 			device_printf(dev, "failed to add enclosure device\n");
 		else
 			device_set_ivars(child, (void *)(intptr_t)-1);
 	}
 	bus_generic_attach(dev);
 	return (0);
 }
 
 int
 ahci_detach(device_t dev)
 {
 	struct ahci_controller *ctlr = device_get_softc(dev);
 	int i;
 
 	/* Detach & delete all children */
 	device_delete_children(dev);
 
 	/* Free interrupts. */
 	for (i = 0; i < ctlr->numirqs; i++) {
 		if (ctlr->irqs[i].r_irq) {
 			bus_teardown_intr(dev, ctlr->irqs[i].r_irq,
 			    ctlr->irqs[i].handle);
 			bus_release_resource(dev, SYS_RES_IRQ,
 			    ctlr->irqs[i].r_irq_rid, ctlr->irqs[i].r_irq);
 		}
 	}
 	bus_dma_tag_destroy(ctlr->dma_tag);
 	/* Free memory. */
 	rman_fini(&ctlr->sc_iomem);
 	ahci_free_mem(dev);
 	return (0);
 }
 
 void
 ahci_free_mem(device_t dev)
 {
 	struct ahci_controller *ctlr = device_get_softc(dev);
 
 	/* Release memory resources */
 	if (ctlr->r_mem)
 		bus_release_resource(dev, SYS_RES_MEMORY, ctlr->r_rid, ctlr->r_mem);
 	if (ctlr->r_msix_table)
 		bus_release_resource(dev, SYS_RES_MEMORY,
 		    ctlr->r_msix_tab_rid, ctlr->r_msix_table);
 	if (ctlr->r_msix_pba)
 		bus_release_resource(dev, SYS_RES_MEMORY,
 		    ctlr->r_msix_pba_rid, ctlr->r_msix_pba);
 
 	ctlr->r_msix_pba = ctlr->r_mem = ctlr->r_msix_table = NULL;
 }
 
 int
 ahci_setup_interrupt(device_t dev)
 {
 	struct ahci_controller *ctlr = device_get_softc(dev);
 	int i;
 
 	/* Check for single MSI vector fallback. */
 	if (ctlr->numirqs > 1 &&
 	    (ATA_INL(ctlr->r_mem, AHCI_GHC) & AHCI_GHC_MRSM) != 0) {
 		device_printf(dev, "Falling back to one MSI\n");
 		ctlr->numirqs = 1;
 	}
 
 	/* Ensure we don't overrun irqs. */
 	if (ctlr->numirqs > AHCI_MAX_IRQS) {
 		device_printf(dev, "Too many irqs %d > %d (clamping)\n",
 		    ctlr->numirqs, AHCI_MAX_IRQS);
 		ctlr->numirqs = AHCI_MAX_IRQS;
 	}
 
 	/* Allocate all IRQs. */
 	for (i = 0; i < ctlr->numirqs; i++) {
 		ctlr->irqs[i].ctlr = ctlr;
 		ctlr->irqs[i].r_irq_rid = i + (ctlr->msi ? 1 : 0);
 		if (ctlr->channels == 1 && !ctlr->ccc && ctlr->msi)
 			ctlr->irqs[i].mode = AHCI_IRQ_MODE_ONE;
 		else if (ctlr->numirqs == 1 || i >= ctlr->channels ||
 		    (ctlr->ccc && i == ctlr->cccv))
 			ctlr->irqs[i].mode = AHCI_IRQ_MODE_ALL;
 		else if (ctlr->channels > ctlr->numirqs &&
 		    i == ctlr->numirqs - 1)
 			ctlr->irqs[i].mode = AHCI_IRQ_MODE_AFTER;
 		else
 			ctlr->irqs[i].mode = AHCI_IRQ_MODE_ONE;
 		if (!(ctlr->irqs[i].r_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ,
 		    &ctlr->irqs[i].r_irq_rid, RF_SHAREABLE | RF_ACTIVE))) {
 			device_printf(dev, "unable to map interrupt\n");
 			return (ENXIO);
 		}
 		if ((bus_setup_intr(dev, ctlr->irqs[i].r_irq, ATA_INTR_FLAGS, NULL,
 		    (ctlr->irqs[i].mode != AHCI_IRQ_MODE_ONE) ? ahci_intr :
 		     ((ctlr->quirks & AHCI_Q_EDGEIS) ? ahci_intr_one_edge :
 		      ahci_intr_one),
 		    &ctlr->irqs[i], &ctlr->irqs[i].handle))) {
 			/* SOS XXX release r_irq */
 			device_printf(dev, "unable to setup interrupt\n");
 			return (ENXIO);
 		}
 		if (ctlr->numirqs > 1) {
 			bus_describe_intr(dev, ctlr->irqs[i].r_irq,
 			    ctlr->irqs[i].handle,
 			    ctlr->irqs[i].mode == AHCI_IRQ_MODE_ONE ?
 			    "ch%d" : "%d", i);
 		}
 	}
 	return (0);
 }
 
 /*
  * Common case interrupt handler.
  */
 static void
 ahci_intr(void *data)
 {
 	struct ahci_controller_irq *irq = data;
 	struct ahci_controller *ctlr = irq->ctlr;
 	u_int32_t is, ise = 0;
 	void *arg;
 	int unit;
 
 	if (irq->mode == AHCI_IRQ_MODE_ALL) {
 		unit = 0;
 		if (ctlr->ccc)
 			is = ctlr->ichannels;
 		else
 			is = ATA_INL(ctlr->r_mem, AHCI_IS);
 	} else {	/* AHCI_IRQ_MODE_AFTER */
 		unit = irq->r_irq_rid - 1;
 		is = ATA_INL(ctlr->r_mem, AHCI_IS);
 		is &= (0xffffffff << unit);
 	}
 	/* CCC interrupt is edge triggered. */
 	if (ctlr->ccc)
 		ise = 1 << ctlr->cccv;
 	/* Some controllers have edge triggered IS. */
 	if (ctlr->quirks & AHCI_Q_EDGEIS)
 		ise |= is;
 	if (ise != 0)
 		ATA_OUTL(ctlr->r_mem, AHCI_IS, ise);
 	for (; unit < ctlr->channels; unit++) {
 		if ((is & (1 << unit)) != 0 &&
 		    (arg = ctlr->interrupt[unit].argument)) {
 				ctlr->interrupt[unit].function(arg);
 		}
 	}
 	/* AHCI declares level triggered IS. */
 	if (!(ctlr->quirks & AHCI_Q_EDGEIS))
 		ATA_OUTL(ctlr->r_mem, AHCI_IS, is);
 	ATA_RBL(ctlr->r_mem, AHCI_IS);
 }
 
 /*
  * Simplified interrupt handler for multivector MSI mode.
  */
 static void
 ahci_intr_one(void *data)
 {
 	struct ahci_controller_irq *irq = data;
 	struct ahci_controller *ctlr = irq->ctlr;
 	void *arg;
 	int unit;
 
 	unit = irq->r_irq_rid - 1;
 	if ((arg = ctlr->interrupt[unit].argument))
 	    ctlr->interrupt[unit].function(arg);
 	/* AHCI declares level triggered IS. */
 	ATA_OUTL(ctlr->r_mem, AHCI_IS, 1 << unit);
 	ATA_RBL(ctlr->r_mem, AHCI_IS);
 }
 
 static void
 ahci_intr_one_edge(void *data)
 {
 	struct ahci_controller_irq *irq = data;
 	struct ahci_controller *ctlr = irq->ctlr;
 	void *arg;
 	int unit;
 
 	unit = irq->r_irq_rid - 1;
 	/* Some controllers have edge triggered IS. */
 	ATA_OUTL(ctlr->r_mem, AHCI_IS, 1 << unit);
 	if ((arg = ctlr->interrupt[unit].argument))
 		ctlr->interrupt[unit].function(arg);
 	ATA_RBL(ctlr->r_mem, AHCI_IS);
 }
 
 struct resource *
 ahci_alloc_resource(device_t dev, device_t child, int type, int *rid,
     rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
 {
 	struct ahci_controller *ctlr = device_get_softc(dev);
 	struct resource *res;
 	rman_res_t st;
 	int offset, size, unit;
 
 	unit = (intptr_t)device_get_ivars(child);
 	res = NULL;
 	switch (type) {
 	case SYS_RES_MEMORY:
 		if (unit >= 0) {
 			offset = AHCI_OFFSET + (unit << 7);
 			size = 128;
 		} else if (*rid == 0) {
 			offset = AHCI_EM_CTL;
 			size = 4;
 		} else {
 			offset = (ctlr->emloc & 0xffff0000) >> 14;
 			size = (ctlr->emloc & 0x0000ffff) << 2;
 			if (*rid != 1) {
 				if (*rid == 2 && (ctlr->capsem &
 				    (AHCI_EM_XMT | AHCI_EM_SMB)) == 0)
 					offset += size;
 				else
 					break;
 			}
 		}
 		st = rman_get_start(ctlr->r_mem);
 		res = rman_reserve_resource(&ctlr->sc_iomem, st + offset,
 		    st + offset + size - 1, size, RF_ACTIVE, child);
 		if (res) {
 			bus_space_handle_t bsh;
 			bus_space_tag_t bst;
 			bsh = rman_get_bushandle(ctlr->r_mem);
 			bst = rman_get_bustag(ctlr->r_mem);
 			bus_space_subregion(bst, bsh, offset, 128, &bsh);
 			rman_set_bushandle(res, bsh);
 			rman_set_bustag(res, bst);
 		}
 		break;
 	case SYS_RES_IRQ:
 		if (*rid == ATA_IRQ_RID)
 			res = ctlr->irqs[0].r_irq;
 		break;
 	}
 	return (res);
 }
 
 int
 ahci_release_resource(device_t dev, device_t child, int type, int rid,
     struct resource *r)
 {
 
 	switch (type) {
 	case SYS_RES_MEMORY:
 		rman_release_resource(r);
 		return (0);
 	case SYS_RES_IRQ:
 		if (rid != ATA_IRQ_RID)
 			return (ENOENT);
 		return (0);
 	}
 	return (EINVAL);
 }
 
 int
 ahci_setup_intr(device_t dev, device_t child, struct resource *irq, 
     int flags, driver_filter_t *filter, driver_intr_t *function, 
     void *argument, void **cookiep)
 {
 	struct ahci_controller *ctlr = device_get_softc(dev);
 	int unit = (intptr_t)device_get_ivars(child);
 
 	if (filter != NULL) {
 		printf("ahci.c: we cannot use a filter here\n");
 		return (EINVAL);
 	}
 	ctlr->interrupt[unit].function = function;
 	ctlr->interrupt[unit].argument = argument;
 	return (0);
 }
 
 int
 ahci_teardown_intr(device_t dev, device_t child, struct resource *irq,
     void *cookie)
 {
 	struct ahci_controller *ctlr = device_get_softc(dev);
 	int unit = (intptr_t)device_get_ivars(child);
 
 	ctlr->interrupt[unit].function = NULL;
 	ctlr->interrupt[unit].argument = NULL;
 	return (0);
 }
 
 int
 ahci_print_child(device_t dev, device_t child)
 {
 	int retval, channel;
 
 	retval = bus_print_child_header(dev, child);
 	channel = (int)(intptr_t)device_get_ivars(child);
 	if (channel >= 0)
 		retval += printf(" at channel %d", channel);
 	retval += bus_print_child_footer(dev, child);
 	return (retval);
 }
 
 int
 ahci_child_location_str(device_t dev, device_t child, char *buf,
     size_t buflen)
 {
 	int channel;
 
 	channel = (int)(intptr_t)device_get_ivars(child);
 	if (channel >= 0)
 		snprintf(buf, buflen, "channel=%d", channel);
 	return (0);
 }
 
 bus_dma_tag_t
 ahci_get_dma_tag(device_t dev, device_t child)
 {
 	struct ahci_controller *ctlr = device_get_softc(dev);
 
 	return (ctlr->dma_tag);
 }
 
 static int
 ahci_ch_probe(device_t dev)
 {
 
 	device_set_desc_copy(dev, "AHCI channel");
 	return (BUS_PROBE_DEFAULT);
 }
 
 static int
 ahci_ch_attach(device_t dev)
 {
 	struct ahci_controller *ctlr = device_get_softc(device_get_parent(dev));
 	struct ahci_channel *ch = device_get_softc(dev);
 	struct cam_devq *devq;
 	int rid, error, i, sata_rev = 0;
 	u_int32_t version;
 
 	ch->dev = dev;
 	ch->unit = (intptr_t)device_get_ivars(dev);
 	ch->caps = ctlr->caps;
 	ch->caps2 = ctlr->caps2;
 	ch->start = ctlr->ch_start;
 	ch->quirks = ctlr->quirks;
 	ch->vendorid = ctlr->vendorid;
 	ch->deviceid = ctlr->deviceid;
 	ch->subvendorid = ctlr->subvendorid;
 	ch->subdeviceid = ctlr->subdeviceid;
 	ch->numslots = ((ch->caps & AHCI_CAP_NCS) >> AHCI_CAP_NCS_SHIFT) + 1;
 	mtx_init(&ch->mtx, "AHCI channel lock", NULL, MTX_DEF);
 	ch->pm_level = 0;
 	resource_int_value(device_get_name(dev),
 	    device_get_unit(dev), "pm_level", &ch->pm_level);
 	STAILQ_INIT(&ch->doneq);
 	if (ch->pm_level > 3)
 		callout_init_mtx(&ch->pm_timer, &ch->mtx, 0);
 	callout_init_mtx(&ch->reset_timer, &ch->mtx, 0);
 	/* JMicron external ports (0) sometimes limited */
 	if ((ctlr->quirks & AHCI_Q_SATA1_UNIT0) && ch->unit == 0)
 		sata_rev = 1;
 	if (ch->quirks & AHCI_Q_SATA2)
 		sata_rev = 2;
 	resource_int_value(device_get_name(dev),
 	    device_get_unit(dev), "sata_rev", &sata_rev);
 	for (i = 0; i < 16; i++) {
 		ch->user[i].revision = sata_rev;
 		ch->user[i].mode = 0;
 		ch->user[i].bytecount = 8192;
 		ch->user[i].tags = ch->numslots;
 		ch->user[i].caps = 0;
 		ch->curr[i] = ch->user[i];
 		if (ch->pm_level) {
 			ch->user[i].caps = CTS_SATA_CAPS_H_PMREQ |
 			    CTS_SATA_CAPS_H_APST |
 			    CTS_SATA_CAPS_D_PMREQ | CTS_SATA_CAPS_D_APST;
 		}
 		ch->user[i].caps |= CTS_SATA_CAPS_H_DMAAA |
 		    CTS_SATA_CAPS_H_AN;
 	}
 	rid = 0;
 	if (!(ch->r_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
 	    &rid, RF_ACTIVE)))
 		return (ENXIO);
 	ch->chcaps = ATA_INL(ch->r_mem, AHCI_P_CMD);
 	version = ATA_INL(ctlr->r_mem, AHCI_VS);
 	if (version < 0x00010200 && (ctlr->caps & AHCI_CAP_FBSS))
 		ch->chcaps |= AHCI_P_CMD_FBSCP;
 	if (ch->caps2 & AHCI_CAP2_SDS)
 		ch->chscaps = ATA_INL(ch->r_mem, AHCI_P_DEVSLP);
 	if (bootverbose) {
 		device_printf(dev, "Caps:%s%s%s%s%s%s\n",
 		    (ch->chcaps & AHCI_P_CMD_HPCP) ? " HPCP":"",
 		    (ch->chcaps & AHCI_P_CMD_MPSP) ? " MPSP":"",
 		    (ch->chcaps & AHCI_P_CMD_CPD) ? " CPD":"",
 		    (ch->chcaps & AHCI_P_CMD_ESP) ? " ESP":"",
 		    (ch->chcaps & AHCI_P_CMD_FBSCP) ? " FBSCP":"",
 		    (ch->chscaps & AHCI_P_DEVSLP_DSP) ? " DSP":"");
 	}
 	ahci_dmainit(dev);
 	ahci_slotsalloc(dev);
 	mtx_lock(&ch->mtx);
 	ahci_ch_init(dev);
 	rid = ATA_IRQ_RID;
 	if (!(ch->r_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ,
 	    &rid, RF_SHAREABLE | RF_ACTIVE))) {
 		device_printf(dev, "Unable to map interrupt\n");
 		error = ENXIO;
 		goto err0;
 	}
 	if ((bus_setup_intr(dev, ch->r_irq, ATA_INTR_FLAGS, NULL,
 	    ctlr->direct ? ahci_ch_intr_direct : ahci_ch_intr,
 	    ch, &ch->ih))) {
 		device_printf(dev, "Unable to setup interrupt\n");
 		error = ENXIO;
 		goto err1;
 	}
 	/* Create the device queue for our SIM. */
 	devq = cam_simq_alloc(ch->numslots);
 	if (devq == NULL) {
 		device_printf(dev, "Unable to allocate simq\n");
 		error = ENOMEM;
 		goto err1;
 	}
 	/* Construct SIM entry */
 	ch->sim = cam_sim_alloc(ahciaction, ahcipoll, "ahcich", ch,
 	    device_get_unit(dev), (struct mtx *)&ch->mtx,
 	    min(2, ch->numslots),
 	    (ch->caps & AHCI_CAP_SNCQ) ? ch->numslots : 0,
 	    devq);
 	if (ch->sim == NULL) {
 		cam_simq_free(devq);
 		device_printf(dev, "unable to allocate sim\n");
 		error = ENOMEM;
 		goto err1;
 	}
 	if (xpt_bus_register(ch->sim, dev, 0) != CAM_SUCCESS) {
 		device_printf(dev, "unable to register xpt bus\n");
 		error = ENXIO;
 		goto err2;
 	}
 	if (xpt_create_path(&ch->path, /*periph*/NULL, cam_sim_path(ch->sim),
 	    CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
 		device_printf(dev, "unable to create path\n");
 		error = ENXIO;
 		goto err3;
 	}
 	if (ch->pm_level > 3) {
 		callout_reset(&ch->pm_timer,
 		    (ch->pm_level == 4) ? hz / 1000 : hz / 8,
 		    ahci_ch_pm, ch);
 	}
 	mtx_unlock(&ch->mtx);
 	return (0);
 
 err3:
 	xpt_bus_deregister(cam_sim_path(ch->sim));
 err2:
 	cam_sim_free(ch->sim, /*free_devq*/TRUE);
 err1:
 	bus_release_resource(dev, SYS_RES_IRQ, ATA_IRQ_RID, ch->r_irq);
 err0:
 	bus_release_resource(dev, SYS_RES_MEMORY, ch->unit, ch->r_mem);
 	mtx_unlock(&ch->mtx);
 	mtx_destroy(&ch->mtx);
 	return (error);
 }
 
 static int
 ahci_ch_detach(device_t dev)
 {
 	struct ahci_channel *ch = device_get_softc(dev);
 
 	mtx_lock(&ch->mtx);
 	xpt_async(AC_LOST_DEVICE, ch->path, NULL);
 	/* Forget about reset. */
 	if (ch->resetting) {
 		ch->resetting = 0;
 		xpt_release_simq(ch->sim, TRUE);
 	}
 	xpt_free_path(ch->path);
 	xpt_bus_deregister(cam_sim_path(ch->sim));
 	cam_sim_free(ch->sim, /*free_devq*/TRUE);
 	mtx_unlock(&ch->mtx);
 
 	if (ch->pm_level > 3)
 		callout_drain(&ch->pm_timer);
 	callout_drain(&ch->reset_timer);
 	bus_teardown_intr(dev, ch->r_irq, ch->ih);
 	bus_release_resource(dev, SYS_RES_IRQ, ATA_IRQ_RID, ch->r_irq);
 
 	ahci_ch_deinit(dev);
 	ahci_slotsfree(dev);
 	ahci_dmafini(dev);
 
 	bus_release_resource(dev, SYS_RES_MEMORY, ch->unit, ch->r_mem);
 	mtx_destroy(&ch->mtx);
 	return (0);
 }
 
 static int
 ahci_ch_init(device_t dev)
 {
 	struct ahci_channel *ch = device_get_softc(dev);
 	uint64_t work;
 
 	/* Disable port interrupts */
 	ATA_OUTL(ch->r_mem, AHCI_P_IE, 0);
 	/* Setup work areas */
 	work = ch->dma.work_bus + AHCI_CL_OFFSET;
 	ATA_OUTL(ch->r_mem, AHCI_P_CLB, work & 0xffffffff);
 	ATA_OUTL(ch->r_mem, AHCI_P_CLBU, work >> 32);
 	work = ch->dma.rfis_bus;
 	ATA_OUTL(ch->r_mem, AHCI_P_FB, work & 0xffffffff); 
 	ATA_OUTL(ch->r_mem, AHCI_P_FBU, work >> 32);
 	/* Activate the channel and power/spin up device */
 	ATA_OUTL(ch->r_mem, AHCI_P_CMD,
 	     (AHCI_P_CMD_ACTIVE | AHCI_P_CMD_POD | AHCI_P_CMD_SUD |
 	     ((ch->pm_level == 2 || ch->pm_level == 3) ? AHCI_P_CMD_ALPE : 0) |
 	     ((ch->pm_level > 2) ? AHCI_P_CMD_ASP : 0 )));
 	ahci_start_fr(ch);
 	ahci_start(ch, 1);
 	return (0);
 }
 
 static int
 ahci_ch_deinit(device_t dev)
 {
 	struct ahci_channel *ch = device_get_softc(dev);
 
 	/* Disable port interrupts. */
 	ATA_OUTL(ch->r_mem, AHCI_P_IE, 0);
 	/* Reset command register. */
 	ahci_stop(ch);
 	ahci_stop_fr(ch);
 	ATA_OUTL(ch->r_mem, AHCI_P_CMD, 0);
 	/* Allow everything, including partial and slumber modes. */
 	ATA_OUTL(ch->r_mem, AHCI_P_SCTL, 0);
 	/* Request slumber mode transition and give some time to get there. */
 	ATA_OUTL(ch->r_mem, AHCI_P_CMD, AHCI_P_CMD_SLUMBER);
 	DELAY(100);
 	/* Disable PHY. */
 	ATA_OUTL(ch->r_mem, AHCI_P_SCTL, ATA_SC_DET_DISABLE);
 	return (0);
 }
 
 static int
 ahci_ch_suspend(device_t dev)
 {
 	struct ahci_channel *ch = device_get_softc(dev);
 
 	mtx_lock(&ch->mtx);
 	xpt_freeze_simq(ch->sim, 1);
 	/* Forget about reset. */
 	if (ch->resetting) {
 		ch->resetting = 0;
 		callout_stop(&ch->reset_timer);
 		xpt_release_simq(ch->sim, TRUE);
 	}
 	while (ch->oslots)
 		msleep(ch, &ch->mtx, PRIBIO, "ahcisusp", hz/100);
 	ahci_ch_deinit(dev);
 	mtx_unlock(&ch->mtx);
 	return (0);
 }
 
 static int
 ahci_ch_resume(device_t dev)
 {
 	struct ahci_channel *ch = device_get_softc(dev);
 
 	mtx_lock(&ch->mtx);
 	ahci_ch_init(dev);
 	ahci_reset(ch);
 	xpt_release_simq(ch->sim, TRUE);
 	mtx_unlock(&ch->mtx);
 	return (0);
 }
 
 devclass_t ahcich_devclass;
 static device_method_t ahcich_methods[] = {
 	DEVMETHOD(device_probe,     ahci_ch_probe),
 	DEVMETHOD(device_attach,    ahci_ch_attach),
 	DEVMETHOD(device_detach,    ahci_ch_detach),
 	DEVMETHOD(device_suspend,   ahci_ch_suspend),
 	DEVMETHOD(device_resume,    ahci_ch_resume),
 	DEVMETHOD_END
 };
 static driver_t ahcich_driver = {
         "ahcich",
         ahcich_methods,
         sizeof(struct ahci_channel)
 };
 DRIVER_MODULE(ahcich, ahci, ahcich_driver, ahcich_devclass, NULL, NULL);
 
 struct ahci_dc_cb_args {
 	bus_addr_t maddr;
 	int error;
 };
 
 static void
 ahci_dmainit(device_t dev)
 {
 	struct ahci_channel *ch = device_get_softc(dev);
 	struct ahci_dc_cb_args dcba;
 	size_t rfsize;
 
 	/* Command area. */
 	if (bus_dma_tag_create(bus_get_dma_tag(dev), 1024, 0,
 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
 	    NULL, NULL, AHCI_WORK_SIZE, 1, AHCI_WORK_SIZE,
 	    0, NULL, NULL, &ch->dma.work_tag))
 		goto error;
 	if (bus_dmamem_alloc(ch->dma.work_tag, (void **)&ch->dma.work,
 	    BUS_DMA_ZERO, &ch->dma.work_map))
 		goto error;
 	if (bus_dmamap_load(ch->dma.work_tag, ch->dma.work_map, ch->dma.work,
 	    AHCI_WORK_SIZE, ahci_dmasetupc_cb, &dcba, 0) || dcba.error) {
 		bus_dmamem_free(ch->dma.work_tag, ch->dma.work, ch->dma.work_map);
 		goto error;
 	}
 	ch->dma.work_bus = dcba.maddr;
 	/* FIS receive area. */
 	if (ch->chcaps & AHCI_P_CMD_FBSCP)
 	    rfsize = 4096;
 	else
 	    rfsize = 256;
 	if (bus_dma_tag_create(bus_get_dma_tag(dev), rfsize, 0,
 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
 	    NULL, NULL, rfsize, 1, rfsize,
 	    0, NULL, NULL, &ch->dma.rfis_tag))
 		goto error;
 	if (bus_dmamem_alloc(ch->dma.rfis_tag, (void **)&ch->dma.rfis, 0,
 	    &ch->dma.rfis_map))
 		goto error;
 	if (bus_dmamap_load(ch->dma.rfis_tag, ch->dma.rfis_map, ch->dma.rfis,
 	    rfsize, ahci_dmasetupc_cb, &dcba, 0) || dcba.error) {
 		bus_dmamem_free(ch->dma.rfis_tag, ch->dma.rfis, ch->dma.rfis_map);
 		goto error;
 	}
 	ch->dma.rfis_bus = dcba.maddr;
 	/* Data area. */
 	if (bus_dma_tag_create(bus_get_dma_tag(dev), 2, 0,
 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
 	    NULL, NULL,
 	    AHCI_SG_ENTRIES * PAGE_SIZE * ch->numslots,
 	    AHCI_SG_ENTRIES, AHCI_PRD_MAX,
 	    0, busdma_lock_mutex, &ch->mtx, &ch->dma.data_tag)) {
 		goto error;
 	}
 	return;
 
 error:
 	device_printf(dev, "WARNING - DMA initialization failed\n");
 	ahci_dmafini(dev);
 }
 
 static void
 ahci_dmasetupc_cb(void *xsc, bus_dma_segment_t *segs, int nsegs, int error)
 {
 	struct ahci_dc_cb_args *dcba = (struct ahci_dc_cb_args *)xsc;
 
 	if (!(dcba->error = error))
 		dcba->maddr = segs[0].ds_addr;
 }
 
 static void
 ahci_dmafini(device_t dev)
 {
 	struct ahci_channel *ch = device_get_softc(dev);
 
 	if (ch->dma.data_tag) {
 		bus_dma_tag_destroy(ch->dma.data_tag);
 		ch->dma.data_tag = NULL;
 	}
 	if (ch->dma.rfis_bus) {
 		bus_dmamap_unload(ch->dma.rfis_tag, ch->dma.rfis_map);
 		bus_dmamem_free(ch->dma.rfis_tag, ch->dma.rfis, ch->dma.rfis_map);
 		ch->dma.rfis_bus = 0;
 		ch->dma.rfis = NULL;
 	}
 	if (ch->dma.work_bus) {
 		bus_dmamap_unload(ch->dma.work_tag, ch->dma.work_map);
 		bus_dmamem_free(ch->dma.work_tag, ch->dma.work, ch->dma.work_map);
 		ch->dma.work_bus = 0;
 		ch->dma.work = NULL;
 	}
 	if (ch->dma.work_tag) {
 		bus_dma_tag_destroy(ch->dma.work_tag);
 		ch->dma.work_tag = NULL;
 	}
 }
 
 static void
 ahci_slotsalloc(device_t dev)
 {
 	struct ahci_channel *ch = device_get_softc(dev);
 	int i;
 
 	/* Alloc and setup command/dma slots */
 	bzero(ch->slot, sizeof(ch->slot));
 	for (i = 0; i < ch->numslots; i++) {
 		struct ahci_slot *slot = &ch->slot[i];
 
 		slot->ch = ch;
 		slot->slot = i;
 		slot->state = AHCI_SLOT_EMPTY;
 		slot->ccb = NULL;
 		callout_init_mtx(&slot->timeout, &ch->mtx, 0);
 
 		if (bus_dmamap_create(ch->dma.data_tag, 0, &slot->dma.data_map))
 			device_printf(ch->dev, "FAILURE - create data_map\n");
 	}
 }
 
 static void
 ahci_slotsfree(device_t dev)
 {
 	struct ahci_channel *ch = device_get_softc(dev);
 	int i;
 
 	/* Free all dma slots */
 	for (i = 0; i < ch->numslots; i++) {
 		struct ahci_slot *slot = &ch->slot[i];
 
 		callout_drain(&slot->timeout);
 		if (slot->dma.data_map) {
 			bus_dmamap_destroy(ch->dma.data_tag, slot->dma.data_map);
 			slot->dma.data_map = NULL;
 		}
 	}
 }
 
 static int
 ahci_phy_check_events(struct ahci_channel *ch, u_int32_t serr)
 {
 
 	if (((ch->pm_level == 0) && (serr & ATA_SE_PHY_CHANGED)) ||
 	    ((ch->pm_level != 0 || ch->listening) && (serr & ATA_SE_EXCHANGED))) {
 		u_int32_t status = ATA_INL(ch->r_mem, AHCI_P_SSTS);
 		union ccb *ccb;
 
 		if (bootverbose) {
 			if ((status & ATA_SS_DET_MASK) != ATA_SS_DET_NO_DEVICE)
 				device_printf(ch->dev, "CONNECT requested\n");
 			else
 				device_printf(ch->dev, "DISCONNECT requested\n");
 		}
 		ahci_reset(ch);
 		if ((ccb = xpt_alloc_ccb_nowait()) == NULL)
 			return (0);
 		if (xpt_create_path(&ccb->ccb_h.path, NULL,
 		    cam_sim_path(ch->sim),
 		    CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
 			xpt_free_ccb(ccb);
 			return (0);
 		}
 		xpt_rescan(ccb);
 		return (1);
 	}
 	return (0);
 }
 
 static void
 ahci_cpd_check_events(struct ahci_channel *ch)
 {
 	u_int32_t status;
 	union ccb *ccb;
 	device_t dev;
 
 	if (ch->pm_level == 0)
 		return;
 
 	status = ATA_INL(ch->r_mem, AHCI_P_CMD);
 	if ((status & AHCI_P_CMD_CPD) == 0)
 		return;
 
 	if (bootverbose) {
 		dev = ch->dev;
 		if (status & AHCI_P_CMD_CPS) {
 			device_printf(dev, "COLD CONNECT requested\n");
 		} else
 			device_printf(dev, "COLD DISCONNECT requested\n");
 	}
 	ahci_reset(ch);
 	if ((ccb = xpt_alloc_ccb_nowait()) == NULL)
 		return;
 	if (xpt_create_path(&ccb->ccb_h.path, NULL, cam_sim_path(ch->sim),
 	    CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
 		xpt_free_ccb(ccb);
 		return;
 	}
 	xpt_rescan(ccb);
 }
 
 static void
 ahci_notify_events(struct ahci_channel *ch, u_int32_t status)
 {
 	struct cam_path *dpath;
 	int i;
 
 	if (ch->caps & AHCI_CAP_SSNTF)
 		ATA_OUTL(ch->r_mem, AHCI_P_SNTF, status);
 	if (bootverbose)
 		device_printf(ch->dev, "SNTF 0x%04x\n", status);
 	for (i = 0; i < 16; i++) {
 		if ((status & (1 << i)) == 0)
 			continue;
 		if (xpt_create_path(&dpath, NULL,
 		    xpt_path_path_id(ch->path), i, 0) == CAM_REQ_CMP) {
 			xpt_async(AC_SCSI_AEN, dpath, NULL);
 			xpt_free_path(dpath);
 		}
 	}
 }
 
 static void
 ahci_done(struct ahci_channel *ch, union ccb *ccb)
 {
 
 	mtx_assert(&ch->mtx, MA_OWNED);
 	if ((ccb->ccb_h.func_code & XPT_FC_QUEUED) == 0 ||
 	    ch->batch == 0) {
 		xpt_done(ccb);
 		return;
 	}
 
 	STAILQ_INSERT_TAIL(&ch->doneq, &ccb->ccb_h, sim_links.stqe);
 }
 
 static void
 ahci_ch_intr(void *arg)
 {
 	struct ahci_channel *ch = (struct ahci_channel *)arg;
 	uint32_t istatus;
 
 	/* Read interrupt statuses. */
 	istatus = ATA_INL(ch->r_mem, AHCI_P_IS);
-	if (istatus == 0)
-		return;
 
 	mtx_lock(&ch->mtx);
 	ahci_ch_intr_main(ch, istatus);
 	mtx_unlock(&ch->mtx);
 }
 
 static void
 ahci_ch_intr_direct(void *arg)
 {
 	struct ahci_channel *ch = (struct ahci_channel *)arg;
 	struct ccb_hdr *ccb_h;
 	uint32_t istatus;
 	STAILQ_HEAD(, ccb_hdr) tmp_doneq = STAILQ_HEAD_INITIALIZER(tmp_doneq);
 
 	/* Read interrupt statuses. */
 	istatus = ATA_INL(ch->r_mem, AHCI_P_IS);
-	if (istatus == 0)
-		return;
 
 	mtx_lock(&ch->mtx);
 	ch->batch = 1;
 	ahci_ch_intr_main(ch, istatus);
 	ch->batch = 0;
 	/*
 	 * Prevent the possibility of issues caused by processing the queue
 	 * while unlocked below by moving the contents to a local queue.
 	 */
 	STAILQ_CONCAT(&tmp_doneq, &ch->doneq);
 	mtx_unlock(&ch->mtx);
 	while ((ccb_h = STAILQ_FIRST(&tmp_doneq)) != NULL) {
 		STAILQ_REMOVE_HEAD(&tmp_doneq, sim_links.stqe);
 		xpt_done_direct((union ccb *)ccb_h);
 	}
 }
 
 static void
 ahci_ch_pm(void *arg)
 {
 	struct ahci_channel *ch = (struct ahci_channel *)arg;
 	uint32_t work;
 
 	if (ch->numrslots != 0)
 		return;
 	work = ATA_INL(ch->r_mem, AHCI_P_CMD);
 	if (ch->pm_level == 4)
 		work |= AHCI_P_CMD_PARTIAL;
 	else
 		work |= AHCI_P_CMD_SLUMBER;
 	ATA_OUTL(ch->r_mem, AHCI_P_CMD, work);
 }
 
 static void
 ahci_ch_intr_main(struct ahci_channel *ch, uint32_t istatus)
 {
 	uint32_t cstatus, serr = 0, sntf = 0, ok, err;
 	enum ahci_err_type et;
 	int i, ccs, port, reset = 0;
 
 	/* Clear interrupt statuses. */
 	ATA_OUTL(ch->r_mem, AHCI_P_IS, istatus);
 	/* Read command statuses. */
 	if (ch->numtslots != 0)
 		cstatus = ATA_INL(ch->r_mem, AHCI_P_SACT);
 	else
 		cstatus = 0;
 	if (ch->numrslots != ch->numtslots)
 		cstatus |= ATA_INL(ch->r_mem, AHCI_P_CI);
 	/* Read SNTF in one of possible ways. */
 	if ((istatus & AHCI_P_IX_SDB) &&
 	    (ch->pm_present || ch->curr[0].atapi != 0)) {
 		if (ch->caps & AHCI_CAP_SSNTF)
 			sntf = ATA_INL(ch->r_mem, AHCI_P_SNTF);
 		else if (ch->fbs_enabled) {
 			u_int8_t *fis = ch->dma.rfis + 0x58;
 
 			for (i = 0; i < 16; i++) {
 				if (fis[1] & 0x80) {
 					fis[1] &= 0x7f;
 	    				sntf |= 1 << i;
 	    			}
 	    			fis += 256;
 	    		}
 		} else {
 			u_int8_t *fis = ch->dma.rfis + 0x58;
 
 			if (fis[1] & 0x80)
 				sntf = (1 << (fis[1] & 0x0f));
 		}
 	}
 	/* Process PHY events */
 	if (istatus & (AHCI_P_IX_PC | AHCI_P_IX_PRC | AHCI_P_IX_OF |
 	    AHCI_P_IX_IF | AHCI_P_IX_HBD | AHCI_P_IX_HBF | AHCI_P_IX_TFE)) {
 		serr = ATA_INL(ch->r_mem, AHCI_P_SERR);
 		if (serr) {
 			ATA_OUTL(ch->r_mem, AHCI_P_SERR, serr);
 			reset = ahci_phy_check_events(ch, serr);
 		}
 	}
 	/* Process cold presence detection events */
 	if ((istatus & AHCI_P_IX_CPD) && !reset)
 		ahci_cpd_check_events(ch);
 	/* Process command errors */
 	if (istatus & (AHCI_P_IX_OF | AHCI_P_IX_IF |
 	    AHCI_P_IX_HBD | AHCI_P_IX_HBF | AHCI_P_IX_TFE)) {
 		ccs = (ATA_INL(ch->r_mem, AHCI_P_CMD) & AHCI_P_CMD_CCS_MASK)
 		    >> AHCI_P_CMD_CCS_SHIFT;
 //device_printf(dev, "%s ERROR is %08x cs %08x ss %08x rs %08x tfd %02x serr %08x fbs %08x ccs %d\n",
 //    __func__, istatus, cstatus, sstatus, ch->rslots, ATA_INL(ch->r_mem, AHCI_P_TFD),
 //    serr, ATA_INL(ch->r_mem, AHCI_P_FBS), ccs);
 		port = -1;
 		if (ch->fbs_enabled) {
 			uint32_t fbs = ATA_INL(ch->r_mem, AHCI_P_FBS);
 			if (fbs & AHCI_P_FBS_SDE) {
 				port = (fbs & AHCI_P_FBS_DWE)
 				    >> AHCI_P_FBS_DWE_SHIFT;
 			} else {
 				for (i = 0; i < 16; i++) {
 					if (ch->numrslotspd[i] == 0)
 						continue;
 					if (port == -1)
 						port = i;
 					else if (port != i) {
 						port = -2;
 						break;
 					}
 				}
 			}
 		}
 		err = ch->rslots & cstatus;
 	} else {
 		ccs = 0;
 		err = 0;
 		port = -1;
 	}
 	/* Complete all successful commands. */
 	ok = ch->rslots & ~cstatus;
 	for (i = 0; i < ch->numslots; i++) {
 		if ((ok >> i) & 1)
 			ahci_end_transaction(&ch->slot[i], AHCI_ERR_NONE);
 	}
 	/* On error, complete the rest of commands with error statuses. */
 	if (err) {
 		if (ch->frozen) {
 			union ccb *fccb = ch->frozen;
 			ch->frozen = NULL;
 			fccb->ccb_h.status = CAM_REQUEUE_REQ | CAM_RELEASE_SIMQ;
 			if (!(fccb->ccb_h.status & CAM_DEV_QFRZN)) {
 				xpt_freeze_devq(fccb->ccb_h.path, 1);
 				fccb->ccb_h.status |= CAM_DEV_QFRZN;
 			}
 			ahci_done(ch, fccb);
 		}
 		for (i = 0; i < ch->numslots; i++) {
 			/* XXX: reqests in loading state. */
 			if (((err >> i) & 1) == 0)
 				continue;
 			if (port >= 0 &&
 			    ch->slot[i].ccb->ccb_h.target_id != port)
 				continue;
 			if (istatus & AHCI_P_IX_TFE) {
 			    if (port != -2) {
 				/* Task File Error */
 				if (ch->numtslotspd[
 				    ch->slot[i].ccb->ccb_h.target_id] == 0) {
 					/* Untagged operation. */
 					if (i == ccs)
 						et = AHCI_ERR_TFE;
 					else
 						et = AHCI_ERR_INNOCENT;
 				} else {
 					/* Tagged operation. */
 					et = AHCI_ERR_NCQ;
 				}
 			    } else {
 				et = AHCI_ERR_TFE;
 				ch->fatalerr = 1;
 			    }
 			} else if (istatus & AHCI_P_IX_IF) {
 				if (ch->numtslots == 0 && i != ccs && port != -2)
 					et = AHCI_ERR_INNOCENT;
 				else
 					et = AHCI_ERR_SATA;
 			} else
 				et = AHCI_ERR_INVALID;
 			ahci_end_transaction(&ch->slot[i], et);
 		}
 		/*
 		 * We can't reinit port if there are some other
 		 * commands active, use resume to complete them.
 		 */
 		if (ch->rslots != 0 && !ch->recoverycmd)
 			ATA_OUTL(ch->r_mem, AHCI_P_FBS, AHCI_P_FBS_EN | AHCI_P_FBS_DEC);
 	}
 	/* Process NOTIFY events */
 	if (sntf)
 		ahci_notify_events(ch, sntf);
 }
 
 /* Must be called with channel locked. */
 static int
 ahci_check_collision(struct ahci_channel *ch, union ccb *ccb)
 {
 	int t = ccb->ccb_h.target_id;
 
 	if ((ccb->ccb_h.func_code == XPT_ATA_IO) &&
 	    (ccb->ataio.cmd.flags & CAM_ATAIO_FPDMA)) {
 		/* Tagged command while we have no supported tag free. */
 		if (((~ch->oslots) & (0xffffffff >> (32 -
 		    ch->curr[t].tags))) == 0)
 			return (1);
 		/* If we have FBS */
 		if (ch->fbs_enabled) {
 			/* Tagged command while untagged are active. */
 			if (ch->numrslotspd[t] != 0 && ch->numtslotspd[t] == 0)
 				return (1);
 		} else {
 			/* Tagged command while untagged are active. */
 			if (ch->numrslots != 0 && ch->numtslots == 0)
 				return (1);
 			/* Tagged command while tagged to other target is active. */
 			if (ch->numtslots != 0 &&
 			    ch->taggedtarget != ccb->ccb_h.target_id)
 				return (1);
 		}
 	} else {
 		/* If we have FBS */
 		if (ch->fbs_enabled) {
 			/* Untagged command while tagged are active. */
 			if (ch->numrslotspd[t] != 0 && ch->numtslotspd[t] != 0)
 				return (1);
 		} else {
 			/* Untagged command while tagged are active. */
 			if (ch->numrslots != 0 && ch->numtslots != 0)
 				return (1);
 		}
 	}
 	if ((ccb->ccb_h.func_code == XPT_ATA_IO) &&
 	    (ccb->ataio.cmd.flags & (CAM_ATAIO_CONTROL | CAM_ATAIO_NEEDRESULT))) {
 		/* Atomic command while anything active. */
 		if (ch->numrslots != 0)
 			return (1);
 	}
        /* We have some atomic command running. */
        if (ch->aslots != 0)
                return (1);
 	return (0);
 }
 
 /* Must be called with channel locked. */
 static void
 ahci_begin_transaction(struct ahci_channel *ch, union ccb *ccb)
 {
 	struct ahci_slot *slot;
 	int tag, tags;
 
 	/* Choose empty slot. */
 	tags = ch->numslots;
 	if ((ccb->ccb_h.func_code == XPT_ATA_IO) &&
 	    (ccb->ataio.cmd.flags & CAM_ATAIO_FPDMA))
 		tags = ch->curr[ccb->ccb_h.target_id].tags;
 	if (ch->lastslot + 1 < tags)
 		tag = ffs(~(ch->oslots >> (ch->lastslot + 1)));
 	else
 		tag = 0;
 	if (tag == 0 || tag + ch->lastslot >= tags)
 		tag = ffs(~ch->oslots) - 1;
 	else
 		tag += ch->lastslot;
 	ch->lastslot = tag;
 	/* Occupy chosen slot. */
 	slot = &ch->slot[tag];
 	slot->ccb = ccb;
 	/* Stop PM timer. */
 	if (ch->numrslots == 0 && ch->pm_level > 3)
 		callout_stop(&ch->pm_timer);
 	/* Update channel stats. */
 	ch->oslots |= (1 << tag);
 	ch->numrslots++;
 	ch->numrslotspd[ccb->ccb_h.target_id]++;
 	if ((ccb->ccb_h.func_code == XPT_ATA_IO) &&
 	    (ccb->ataio.cmd.flags & CAM_ATAIO_FPDMA)) {
 		ch->numtslots++;
 		ch->numtslotspd[ccb->ccb_h.target_id]++;
 		ch->taggedtarget = ccb->ccb_h.target_id;
 	}
 	if ((ccb->ccb_h.func_code == XPT_ATA_IO) &&
 	    (ccb->ataio.cmd.flags & (CAM_ATAIO_CONTROL | CAM_ATAIO_NEEDRESULT)))
 		ch->aslots |= (1 << tag);
 	if ((ccb->ccb_h.flags & CAM_DIR_MASK) != CAM_DIR_NONE) {
 		slot->state = AHCI_SLOT_LOADING;
 		bus_dmamap_load_ccb(ch->dma.data_tag, slot->dma.data_map, ccb,
 		    ahci_dmasetprd, slot, 0);
 	} else {
 		slot->dma.nsegs = 0;
 		ahci_execute_transaction(slot);
 	}
 }
 
 /* Locked by busdma engine. */
 static void
 ahci_dmasetprd(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
 {    
 	struct ahci_slot *slot = arg;
 	struct ahci_channel *ch = slot->ch;
 	struct ahci_cmd_tab *ctp;
 	struct ahci_dma_prd *prd;
 	int i;
 
 	if (error) {
 		device_printf(ch->dev, "DMA load error\n");
 		ahci_end_transaction(slot, AHCI_ERR_INVALID);
 		return;
 	}
 	KASSERT(nsegs <= AHCI_SG_ENTRIES, ("too many DMA segment entries\n"));
 	/* Get a piece of the workspace for this request */
 	ctp = (struct ahci_cmd_tab *)
 		(ch->dma.work + AHCI_CT_OFFSET + (AHCI_CT_SIZE * slot->slot));
 	/* Fill S/G table */
 	prd = &ctp->prd_tab[0];
 	for (i = 0; i < nsegs; i++) {
 		prd[i].dba = htole64(segs[i].ds_addr);
 		prd[i].dbc = htole32((segs[i].ds_len - 1) & AHCI_PRD_MASK);
 	}
 	slot->dma.nsegs = nsegs;
 	bus_dmamap_sync(ch->dma.data_tag, slot->dma.data_map,
 	    ((slot->ccb->ccb_h.flags & CAM_DIR_IN) ?
 	    BUS_DMASYNC_PREREAD : BUS_DMASYNC_PREWRITE));
 	ahci_execute_transaction(slot);
 }
 
 /* Must be called with channel locked. */
 static void
 ahci_execute_transaction(struct ahci_slot *slot)
 {
 	struct ahci_channel *ch = slot->ch;
 	struct ahci_cmd_tab *ctp;
 	struct ahci_cmd_list *clp;
 	union ccb *ccb = slot->ccb;
 	int port = ccb->ccb_h.target_id & 0x0f;
 	int fis_size, i, softreset;
 	uint8_t *fis = ch->dma.rfis + 0x40;
 	uint8_t val;
 
 	/* Get a piece of the workspace for this request */
 	ctp = (struct ahci_cmd_tab *)
 		(ch->dma.work + AHCI_CT_OFFSET + (AHCI_CT_SIZE * slot->slot));
 	/* Setup the FIS for this request */
 	if (!(fis_size = ahci_setup_fis(ch, ctp, ccb, slot->slot))) {
 		device_printf(ch->dev, "Setting up SATA FIS failed\n");
 		ahci_end_transaction(slot, AHCI_ERR_INVALID);
 		return;
 	}
 	/* Setup the command list entry */
 	clp = (struct ahci_cmd_list *)
 	    (ch->dma.work + AHCI_CL_OFFSET + (AHCI_CL_SIZE * slot->slot));
 	clp->cmd_flags = htole16(
 		    (ccb->ccb_h.flags & CAM_DIR_OUT ? AHCI_CMD_WRITE : 0) |
 		    (ccb->ccb_h.func_code == XPT_SCSI_IO ?
 		     (AHCI_CMD_ATAPI | AHCI_CMD_PREFETCH) : 0) |
 		    (fis_size / sizeof(u_int32_t)) |
 		    (port << 12));
 	clp->prd_length = htole16(slot->dma.nsegs);
 	/* Special handling for Soft Reset command. */
 	if ((ccb->ccb_h.func_code == XPT_ATA_IO) &&
 	    (ccb->ataio.cmd.flags & CAM_ATAIO_CONTROL)) {
 		if (ccb->ataio.cmd.control & ATA_A_RESET) {
 			softreset = 1;
 			/* Kick controller into sane state */
 			ahci_stop(ch);
 			ahci_clo(ch);
 			ahci_start(ch, 0);
 			clp->cmd_flags |= AHCI_CMD_RESET | AHCI_CMD_CLR_BUSY;
 		} else {
 			softreset = 2;
 			/* Prepare FIS receive area for check. */
 			for (i = 0; i < 20; i++)
 				fis[i] = 0xff;
 		}
 	} else
 		softreset = 0;
 	clp->bytecount = 0;
 	clp->cmd_table_phys = htole64(ch->dma.work_bus + AHCI_CT_OFFSET +
 				  (AHCI_CT_SIZE * slot->slot));
 	bus_dmamap_sync(ch->dma.work_tag, ch->dma.work_map,
 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 	bus_dmamap_sync(ch->dma.rfis_tag, ch->dma.rfis_map,
 	    BUS_DMASYNC_PREREAD);
 	/* Set ACTIVE bit for NCQ commands. */
 	if ((ccb->ccb_h.func_code == XPT_ATA_IO) &&
 	    (ccb->ataio.cmd.flags & CAM_ATAIO_FPDMA)) {
 		ATA_OUTL(ch->r_mem, AHCI_P_SACT, 1 << slot->slot);
 	}
 	/* If FBS is enabled, set PMP port. */
 	if (ch->fbs_enabled) {
 		ATA_OUTL(ch->r_mem, AHCI_P_FBS, AHCI_P_FBS_EN |
 		    (port << AHCI_P_FBS_DEV_SHIFT));
 	}
 	/* Issue command to the controller. */
 	slot->state = AHCI_SLOT_RUNNING;
 	ch->rslots |= (1 << slot->slot);
 	ATA_OUTL(ch->r_mem, AHCI_P_CI, (1 << slot->slot));
 	/* Device reset commands doesn't interrupt. Poll them. */
 	if (ccb->ccb_h.func_code == XPT_ATA_IO &&
 	    (ccb->ataio.cmd.command == ATA_DEVICE_RESET || softreset)) {
 		int count, timeout = ccb->ccb_h.timeout * 100;
 		enum ahci_err_type et = AHCI_ERR_NONE;
 
 		for (count = 0; count < timeout; count++) {
 			DELAY(10);
 			if (!(ATA_INL(ch->r_mem, AHCI_P_CI) & (1 << slot->slot)))
 				break;
 			if ((ATA_INL(ch->r_mem, AHCI_P_TFD) & ATA_S_ERROR) &&
 			    softreset != 1) {
 #if 0
 				device_printf(ch->dev,
 				    "Poll error on slot %d, TFD: %04x\n",
 				    slot->slot, ATA_INL(ch->r_mem, AHCI_P_TFD));
 #endif
 				et = AHCI_ERR_TFE;
 				break;
 			}
 			/* Workaround for ATI SB600/SB700 chipsets. */
 			if (ccb->ccb_h.target_id == 15 &&
 			    (ch->quirks & AHCI_Q_ATI_PMP_BUG) &&
 			    (ATA_INL(ch->r_mem, AHCI_P_IS) & AHCI_P_IX_IPM)) {
 				et = AHCI_ERR_TIMEOUT;
 				break;
 			}
 		}
 
 		/*
 		 * Marvell HBAs with non-RAID firmware do not wait for
 		 * readiness after soft reset, so we have to wait here.
 		 * Marvell RAIDs do not have this problem, but instead
 		 * sometimes forget to update FIS receive area, breaking
 		 * this wait.
 		 */
 		if ((ch->quirks & AHCI_Q_NOBSYRES) == 0 &&
 		    (ch->quirks & AHCI_Q_ATI_PMP_BUG) == 0 &&
 		    softreset == 2 && et == AHCI_ERR_NONE) {
 			for ( ; count < timeout; count++) {
 				bus_dmamap_sync(ch->dma.rfis_tag,
 				    ch->dma.rfis_map, BUS_DMASYNC_POSTREAD);
 				val = fis[2];
 				bus_dmamap_sync(ch->dma.rfis_tag,
 				    ch->dma.rfis_map, BUS_DMASYNC_PREREAD);
 				if ((val & ATA_S_BUSY) == 0)
 					break;
 				DELAY(10);
 			}
 		}
 
 		if (timeout && (count >= timeout)) {
 			device_printf(ch->dev, "Poll timeout on slot %d port %d\n",
 			    slot->slot, port);
 			device_printf(ch->dev, "is %08x cs %08x ss %08x "
 			    "rs %08x tfd %02x serr %08x cmd %08x\n",
 			    ATA_INL(ch->r_mem, AHCI_P_IS),
 			    ATA_INL(ch->r_mem, AHCI_P_CI),
 			    ATA_INL(ch->r_mem, AHCI_P_SACT), ch->rslots,
 			    ATA_INL(ch->r_mem, AHCI_P_TFD),
 			    ATA_INL(ch->r_mem, AHCI_P_SERR),
 			    ATA_INL(ch->r_mem, AHCI_P_CMD));
 			et = AHCI_ERR_TIMEOUT;
 		}
 
 		/* Kick controller into sane state and enable FBS. */
 		if (softreset == 2)
 			ch->eslots |= (1 << slot->slot);
 		ahci_end_transaction(slot, et);
 		return;
 	}
 	/* Start command execution timeout */
 	callout_reset_sbt(&slot->timeout, SBT_1MS * ccb->ccb_h.timeout / 2,
 	    0, (timeout_t*)ahci_timeout, slot, 0);
 	return;
 }
 
 /* Must be called with channel locked. */
 static void
 ahci_process_timeout(struct ahci_channel *ch)
 {
 	int i;
 
 	mtx_assert(&ch->mtx, MA_OWNED);
 	/* Handle the rest of commands. */
 	for (i = 0; i < ch->numslots; i++) {
 		/* Do we have a running request on slot? */
 		if (ch->slot[i].state < AHCI_SLOT_RUNNING)
 			continue;
 		ahci_end_transaction(&ch->slot[i], AHCI_ERR_TIMEOUT);
 	}
 }
 
 /* Must be called with channel locked. */
 static void
 ahci_rearm_timeout(struct ahci_channel *ch)
 {
 	int i;
 
 	mtx_assert(&ch->mtx, MA_OWNED);
 	for (i = 0; i < ch->numslots; i++) {
 		struct ahci_slot *slot = &ch->slot[i];
 
 		/* Do we have a running request on slot? */
 		if (slot->state < AHCI_SLOT_RUNNING)
 			continue;
 		if ((ch->toslots & (1 << i)) == 0)
 			continue;
 		callout_reset_sbt(&slot->timeout,
     	    	    SBT_1MS * slot->ccb->ccb_h.timeout / 2, 0,
 		    (timeout_t*)ahci_timeout, slot, 0);
 	}
 }
 
 /* Locked by callout mechanism. */
 static void
 ahci_timeout(struct ahci_slot *slot)
 {
 	struct ahci_channel *ch = slot->ch;
 	device_t dev = ch->dev;
 	uint32_t sstatus;
 	int ccs;
 	int i;
 
 	/* Check for stale timeout. */
 	if (slot->state < AHCI_SLOT_RUNNING)
 		return;
 
 	/* Check if slot was not being executed last time we checked. */
 	if (slot->state < AHCI_SLOT_EXECUTING) {
 		/* Check if slot started executing. */
 		sstatus = ATA_INL(ch->r_mem, AHCI_P_SACT);
 		ccs = (ATA_INL(ch->r_mem, AHCI_P_CMD) & AHCI_P_CMD_CCS_MASK)
 		    >> AHCI_P_CMD_CCS_SHIFT;
 		if ((sstatus & (1 << slot->slot)) != 0 || ccs == slot->slot ||
 		    ch->fbs_enabled || ch->wrongccs)
 			slot->state = AHCI_SLOT_EXECUTING;
 		else if ((ch->rslots & (1 << ccs)) == 0) {
 			ch->wrongccs = 1;
 			slot->state = AHCI_SLOT_EXECUTING;
 		}
 
 		callout_reset_sbt(&slot->timeout,
 	    	    SBT_1MS * slot->ccb->ccb_h.timeout / 2, 0,
 		    (timeout_t*)ahci_timeout, slot, 0);
 		return;
 	}
 
 	device_printf(dev, "Timeout on slot %d port %d\n",
 	    slot->slot, slot->ccb->ccb_h.target_id & 0x0f);
 	device_printf(dev, "is %08x cs %08x ss %08x rs %08x tfd %02x "
 	    "serr %08x cmd %08x\n",
 	    ATA_INL(ch->r_mem, AHCI_P_IS), ATA_INL(ch->r_mem, AHCI_P_CI),
 	    ATA_INL(ch->r_mem, AHCI_P_SACT), ch->rslots,
 	    ATA_INL(ch->r_mem, AHCI_P_TFD), ATA_INL(ch->r_mem, AHCI_P_SERR),
 	    ATA_INL(ch->r_mem, AHCI_P_CMD));
 
 	/* Handle frozen command. */
 	if (ch->frozen) {
 		union ccb *fccb = ch->frozen;
 		ch->frozen = NULL;
 		fccb->ccb_h.status = CAM_REQUEUE_REQ | CAM_RELEASE_SIMQ;
 		if (!(fccb->ccb_h.status & CAM_DEV_QFRZN)) {
 			xpt_freeze_devq(fccb->ccb_h.path, 1);
 			fccb->ccb_h.status |= CAM_DEV_QFRZN;
 		}
 		ahci_done(ch, fccb);
 	}
 	if (!ch->fbs_enabled && !ch->wrongccs) {
 		/* Without FBS we know real timeout source. */
 		ch->fatalerr = 1;
 		/* Handle command with timeout. */
 		ahci_end_transaction(&ch->slot[slot->slot], AHCI_ERR_TIMEOUT);
 		/* Handle the rest of commands. */
 		for (i = 0; i < ch->numslots; i++) {
 			/* Do we have a running request on slot? */
 			if (ch->slot[i].state < AHCI_SLOT_RUNNING)
 				continue;
 			ahci_end_transaction(&ch->slot[i], AHCI_ERR_INNOCENT);
 		}
 	} else {
 		/* With FBS we wait for other commands timeout and pray. */
 		if (ch->toslots == 0)
 			xpt_freeze_simq(ch->sim, 1);
 		ch->toslots |= (1 << slot->slot);
 		if ((ch->rslots & ~ch->toslots) == 0)
 			ahci_process_timeout(ch);
 		else
 			device_printf(dev, " ... waiting for slots %08x\n",
 			    ch->rslots & ~ch->toslots);
 	}
 }
 
 /* Must be called with channel locked. */
 static void
 ahci_end_transaction(struct ahci_slot *slot, enum ahci_err_type et)
 {
 	struct ahci_channel *ch = slot->ch;
 	union ccb *ccb = slot->ccb;
 	struct ahci_cmd_list *clp;
 	int lastto;
 	uint32_t sig;
 
 	bus_dmamap_sync(ch->dma.work_tag, ch->dma.work_map,
 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 	clp = (struct ahci_cmd_list *)
 	    (ch->dma.work + AHCI_CL_OFFSET + (AHCI_CL_SIZE * slot->slot));
 	/* Read result registers to the result struct
 	 * May be incorrect if several commands finished same time,
 	 * so read only when sure or have to.
 	 */
 	if (ccb->ccb_h.func_code == XPT_ATA_IO) {
 		struct ata_res *res = &ccb->ataio.res;
 
 		if ((et == AHCI_ERR_TFE) ||
 		    (ccb->ataio.cmd.flags & CAM_ATAIO_NEEDRESULT)) {
 			u_int8_t *fis = ch->dma.rfis + 0x40;
 
 			bus_dmamap_sync(ch->dma.rfis_tag, ch->dma.rfis_map,
 			    BUS_DMASYNC_POSTREAD);
 			if (ch->fbs_enabled) {
 				fis += ccb->ccb_h.target_id * 256;
 				res->status = fis[2];
 				res->error = fis[3];
 			} else {
 				uint16_t tfd = ATA_INL(ch->r_mem, AHCI_P_TFD);
 
 				res->status = tfd;
 				res->error = tfd >> 8;
 			}
 			res->lba_low = fis[4];
 			res->lba_mid = fis[5];
 			res->lba_high = fis[6];
 			res->device = fis[7];
 			res->lba_low_exp = fis[8];
 			res->lba_mid_exp = fis[9];
 			res->lba_high_exp = fis[10];
 			res->sector_count = fis[12];
 			res->sector_count_exp = fis[13];
 
 			/*
 			 * Some weird controllers do not return signature in
 			 * FIS receive area. Read it from PxSIG register.
 			 */
 			if ((ch->quirks & AHCI_Q_ALTSIG) &&
 			    (ccb->ataio.cmd.flags & CAM_ATAIO_CONTROL) &&
 			    (ccb->ataio.cmd.control & ATA_A_RESET) == 0) {
 				sig = ATA_INL(ch->r_mem,  AHCI_P_SIG);
 				res->lba_high = sig >> 24;
 				res->lba_mid = sig >> 16;
 				res->lba_low = sig >> 8;
 				res->sector_count = sig;
 			}
 		} else
 			bzero(res, sizeof(*res));
 		if ((ccb->ataio.cmd.flags & CAM_ATAIO_FPDMA) == 0 &&
 		    (ccb->ccb_h.flags & CAM_DIR_MASK) != CAM_DIR_NONE &&
 		    (ch->quirks & AHCI_Q_NOCOUNT) == 0) {
 			ccb->ataio.resid =
 			    ccb->ataio.dxfer_len - le32toh(clp->bytecount);
 		}
 	} else {
 		if ((ccb->ccb_h.flags & CAM_DIR_MASK) != CAM_DIR_NONE &&
 		    (ch->quirks & AHCI_Q_NOCOUNT) == 0) {
 			ccb->csio.resid =
 			    ccb->csio.dxfer_len - le32toh(clp->bytecount);
 		}
 	}
 	if ((ccb->ccb_h.flags & CAM_DIR_MASK) != CAM_DIR_NONE) {
 		bus_dmamap_sync(ch->dma.data_tag, slot->dma.data_map,
 		    (ccb->ccb_h.flags & CAM_DIR_IN) ?
 		    BUS_DMASYNC_POSTREAD : BUS_DMASYNC_POSTWRITE);
 		bus_dmamap_unload(ch->dma.data_tag, slot->dma.data_map);
 	}
 	if (et != AHCI_ERR_NONE)
 		ch->eslots |= (1 << slot->slot);
 	/* In case of error, freeze device for proper recovery. */
 	if ((et != AHCI_ERR_NONE) && (!ch->recoverycmd) &&
 	    !(ccb->ccb_h.status & CAM_DEV_QFRZN)) {
 		xpt_freeze_devq(ccb->ccb_h.path, 1);
 		ccb->ccb_h.status |= CAM_DEV_QFRZN;
 	}
 	/* Set proper result status. */
 	ccb->ccb_h.status &= ~CAM_STATUS_MASK;
 	switch (et) {
 	case AHCI_ERR_NONE:
 		ccb->ccb_h.status |= CAM_REQ_CMP;
 		if (ccb->ccb_h.func_code == XPT_SCSI_IO)
 			ccb->csio.scsi_status = SCSI_STATUS_OK;
 		break;
 	case AHCI_ERR_INVALID:
 		ch->fatalerr = 1;
 		ccb->ccb_h.status |= CAM_REQ_INVALID;
 		break;
 	case AHCI_ERR_INNOCENT:
 		ccb->ccb_h.status |= CAM_REQUEUE_REQ;
 		break;
 	case AHCI_ERR_TFE:
 	case AHCI_ERR_NCQ:
 		if (ccb->ccb_h.func_code == XPT_SCSI_IO) {
 			ccb->ccb_h.status |= CAM_SCSI_STATUS_ERROR;
 			ccb->csio.scsi_status = SCSI_STATUS_CHECK_COND;
 		} else {
 			ccb->ccb_h.status |= CAM_ATA_STATUS_ERROR;
 		}
 		break;
 	case AHCI_ERR_SATA:
 		ch->fatalerr = 1;
 		if (!ch->recoverycmd) {
 			xpt_freeze_simq(ch->sim, 1);
 			ccb->ccb_h.status &= ~CAM_STATUS_MASK;
 			ccb->ccb_h.status |= CAM_RELEASE_SIMQ;
 		}
 		ccb->ccb_h.status |= CAM_UNCOR_PARITY;
 		break;
 	case AHCI_ERR_TIMEOUT:
 		if (!ch->recoverycmd) {
 			xpt_freeze_simq(ch->sim, 1);
 			ccb->ccb_h.status &= ~CAM_STATUS_MASK;
 			ccb->ccb_h.status |= CAM_RELEASE_SIMQ;
 		}
 		ccb->ccb_h.status |= CAM_CMD_TIMEOUT;
 		break;
 	default:
 		ch->fatalerr = 1;
 		ccb->ccb_h.status |= CAM_REQ_CMP_ERR;
 	}
 	/* Free slot. */
 	ch->oslots &= ~(1 << slot->slot);
 	ch->rslots &= ~(1 << slot->slot);
 	ch->aslots &= ~(1 << slot->slot);
 	slot->state = AHCI_SLOT_EMPTY;
 	slot->ccb = NULL;
 	/* Update channel stats. */
 	ch->numrslots--;
 	ch->numrslotspd[ccb->ccb_h.target_id]--;
 	if ((ccb->ccb_h.func_code == XPT_ATA_IO) &&
 	    (ccb->ataio.cmd.flags & CAM_ATAIO_FPDMA)) {
 		ch->numtslots--;
 		ch->numtslotspd[ccb->ccb_h.target_id]--;
 	}
 	/* Cancel timeout state if request completed normally. */
 	if (et != AHCI_ERR_TIMEOUT) {
 		lastto = (ch->toslots == (1 << slot->slot));
 		ch->toslots &= ~(1 << slot->slot);
 		if (lastto)
 			xpt_release_simq(ch->sim, TRUE);
 	}
 	/* If it was first request of reset sequence and there is no error,
 	 * proceed to second request. */
 	if ((ccb->ccb_h.func_code == XPT_ATA_IO) &&
 	    (ccb->ataio.cmd.flags & CAM_ATAIO_CONTROL) &&
 	    (ccb->ataio.cmd.control & ATA_A_RESET) &&
 	    et == AHCI_ERR_NONE) {
 		ccb->ataio.cmd.control &= ~ATA_A_RESET;
 		ahci_begin_transaction(ch, ccb);
 		return;
 	}
 	/* If it was our READ LOG command - process it. */
 	if (ccb->ccb_h.recovery_type == RECOVERY_READ_LOG) {
 		ahci_process_read_log(ch, ccb);
 	/* If it was our REQUEST SENSE command - process it. */
 	} else if (ccb->ccb_h.recovery_type == RECOVERY_REQUEST_SENSE) {
 		ahci_process_request_sense(ch, ccb);
 	/* If it was NCQ or ATAPI command error, put result on hold. */
 	} else if (et == AHCI_ERR_NCQ ||
 	    ((ccb->ccb_h.status & CAM_STATUS_MASK) == CAM_SCSI_STATUS_ERROR &&
 	     (ccb->ccb_h.flags & CAM_DIS_AUTOSENSE) == 0)) {
 		ch->hold[slot->slot] = ccb;
 		ch->numhslots++;
 	} else
 		ahci_done(ch, ccb);
 	/* If we have no other active commands, ... */
 	if (ch->rslots == 0) {
 		/* if there was fatal error - reset port. */
 		if (ch->toslots != 0 || ch->fatalerr) {
 			ahci_reset(ch);
 		} else {
 			/* if we have slots in error, we can reinit port. */
 			if (ch->eslots != 0) {
 				ahci_stop(ch);
 				ahci_clo(ch);
 				ahci_start(ch, 1);
 			}
 			/* if there commands on hold, we can do READ LOG. */
 			if (!ch->recoverycmd && ch->numhslots)
 				ahci_issue_recovery(ch);
 		}
 	/* If all the rest of commands are in timeout - give them chance. */
 	} else if ((ch->rslots & ~ch->toslots) == 0 &&
 	    et != AHCI_ERR_TIMEOUT)
 		ahci_rearm_timeout(ch);
 	/* Unfreeze frozen command. */
 	if (ch->frozen && !ahci_check_collision(ch, ch->frozen)) {
 		union ccb *fccb = ch->frozen;
 		ch->frozen = NULL;
 		ahci_begin_transaction(ch, fccb);
 		xpt_release_simq(ch->sim, TRUE);
 	}
 	/* Start PM timer. */
 	if (ch->numrslots == 0 && ch->pm_level > 3 &&
 	    (ch->curr[ch->pm_present ? 15 : 0].caps & CTS_SATA_CAPS_D_PMREQ)) {
 		callout_schedule(&ch->pm_timer,
 		    (ch->pm_level == 4) ? hz / 1000 : hz / 8);
 	}
 }
 
 static void
 ahci_issue_recovery(struct ahci_channel *ch)
 {
 	union ccb *ccb;
 	struct ccb_ataio *ataio;
 	struct ccb_scsiio *csio;
 	int i;
 
 	/* Find some held command. */
 	for (i = 0; i < ch->numslots; i++) {
 		if (ch->hold[i])
 			break;
 	}
 	ccb = xpt_alloc_ccb_nowait();
 	if (ccb == NULL) {
 		device_printf(ch->dev, "Unable to allocate recovery command\n");
 completeall:
 		/* We can't do anything -- complete held commands. */
 		for (i = 0; i < ch->numslots; i++) {
 			if (ch->hold[i] == NULL)
 				continue;
 			ch->hold[i]->ccb_h.status &= ~CAM_STATUS_MASK;
 			ch->hold[i]->ccb_h.status |= CAM_RESRC_UNAVAIL;
 			ahci_done(ch, ch->hold[i]);
 			ch->hold[i] = NULL;
 			ch->numhslots--;
 		}
 		ahci_reset(ch);
 		return;
 	}
 	ccb->ccb_h = ch->hold[i]->ccb_h;	/* Reuse old header. */
 	if (ccb->ccb_h.func_code == XPT_ATA_IO) {
 		/* READ LOG */
 		ccb->ccb_h.recovery_type = RECOVERY_READ_LOG;
 		ccb->ccb_h.func_code = XPT_ATA_IO;
 		ccb->ccb_h.flags = CAM_DIR_IN;
 		ccb->ccb_h.timeout = 1000;	/* 1s should be enough. */
 		ataio = &ccb->ataio;
 		ataio->data_ptr = malloc(512, M_AHCI, M_NOWAIT);
 		if (ataio->data_ptr == NULL) {
 			xpt_free_ccb(ccb);
 			device_printf(ch->dev,
 			    "Unable to allocate memory for READ LOG command\n");
 			goto completeall;
 		}
 		ataio->dxfer_len = 512;
 		bzero(&ataio->cmd, sizeof(ataio->cmd));
 		ataio->cmd.flags = CAM_ATAIO_48BIT;
 		ataio->cmd.command = 0x2F;	/* READ LOG EXT */
 		ataio->cmd.sector_count = 1;
 		ataio->cmd.sector_count_exp = 0;
 		ataio->cmd.lba_low = 0x10;
 		ataio->cmd.lba_mid = 0;
 		ataio->cmd.lba_mid_exp = 0;
 	} else {
 		/* REQUEST SENSE */
 		ccb->ccb_h.recovery_type = RECOVERY_REQUEST_SENSE;
 		ccb->ccb_h.recovery_slot = i;
 		ccb->ccb_h.func_code = XPT_SCSI_IO;
 		ccb->ccb_h.flags = CAM_DIR_IN;
 		ccb->ccb_h.status = 0;
 		ccb->ccb_h.timeout = 1000;	/* 1s should be enough. */
 		csio = &ccb->csio;
 		csio->data_ptr = (void *)&ch->hold[i]->csio.sense_data;
 		csio->dxfer_len = ch->hold[i]->csio.sense_len;
 		csio->cdb_len = 6;
 		bzero(&csio->cdb_io, sizeof(csio->cdb_io));
 		csio->cdb_io.cdb_bytes[0] = 0x03;
 		csio->cdb_io.cdb_bytes[4] = csio->dxfer_len;
 	}
 	/* Freeze SIM while doing recovery. */
 	ch->recoverycmd = 1;
 	xpt_freeze_simq(ch->sim, 1);
 	ahci_begin_transaction(ch, ccb);
 }
 
 static void
 ahci_process_read_log(struct ahci_channel *ch, union ccb *ccb)
 {
 	uint8_t *data;
 	struct ata_res *res;
 	int i;
 
 	ch->recoverycmd = 0;
 
 	data = ccb->ataio.data_ptr;
 	if ((ccb->ccb_h.status & CAM_STATUS_MASK) == CAM_REQ_CMP &&
 	    (data[0] & 0x80) == 0) {
 		for (i = 0; i < ch->numslots; i++) {
 			if (!ch->hold[i])
 				continue;
 			if (ch->hold[i]->ccb_h.func_code != XPT_ATA_IO)
 				continue;
 			if ((data[0] & 0x1F) == i) {
 				res = &ch->hold[i]->ataio.res;
 				res->status = data[2];
 				res->error = data[3];
 				res->lba_low = data[4];
 				res->lba_mid = data[5];
 				res->lba_high = data[6];
 				res->device = data[7];
 				res->lba_low_exp = data[8];
 				res->lba_mid_exp = data[9];
 				res->lba_high_exp = data[10];
 				res->sector_count = data[12];
 				res->sector_count_exp = data[13];
 			} else {
 				ch->hold[i]->ccb_h.status &= ~CAM_STATUS_MASK;
 				ch->hold[i]->ccb_h.status |= CAM_REQUEUE_REQ;
 			}
 			ahci_done(ch, ch->hold[i]);
 			ch->hold[i] = NULL;
 			ch->numhslots--;
 		}
 	} else {
 		if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP)
 			device_printf(ch->dev, "Error while READ LOG EXT\n");
 		else if ((data[0] & 0x80) == 0) {
 			device_printf(ch->dev, "Non-queued command error in READ LOG EXT\n");
 		}
 		for (i = 0; i < ch->numslots; i++) {
 			if (!ch->hold[i])
 				continue;
 			if (ch->hold[i]->ccb_h.func_code != XPT_ATA_IO)
 				continue;
 			ahci_done(ch, ch->hold[i]);
 			ch->hold[i] = NULL;
 			ch->numhslots--;
 		}
 	}
 	free(ccb->ataio.data_ptr, M_AHCI);
 	xpt_free_ccb(ccb);
 	xpt_release_simq(ch->sim, TRUE);
 }
 
 static void
 ahci_process_request_sense(struct ahci_channel *ch, union ccb *ccb)
 {
 	int i;
 
 	ch->recoverycmd = 0;
 
 	i = ccb->ccb_h.recovery_slot;
 	if ((ccb->ccb_h.status & CAM_STATUS_MASK) == CAM_REQ_CMP) {
 		ch->hold[i]->ccb_h.status |= CAM_AUTOSNS_VALID;
 	} else {
 		ch->hold[i]->ccb_h.status &= ~CAM_STATUS_MASK;
 		ch->hold[i]->ccb_h.status |= CAM_AUTOSENSE_FAIL;
 	}
 	ahci_done(ch, ch->hold[i]);
 	ch->hold[i] = NULL;
 	ch->numhslots--;
 	xpt_free_ccb(ccb);
 	xpt_release_simq(ch->sim, TRUE);
 }
 
 static void
 ahci_start(struct ahci_channel *ch, int fbs)
 {
 	u_int32_t cmd;
 
 	/* Run the channel start callback, if any. */
 	if (ch->start)
 		ch->start(ch);
 
 	/* Clear SATA error register */
 	ATA_OUTL(ch->r_mem, AHCI_P_SERR, 0xFFFFFFFF);
 	/* Clear any interrupts pending on this channel */
 	ATA_OUTL(ch->r_mem, AHCI_P_IS, 0xFFFFFFFF);
 	/* Configure FIS-based switching if supported. */
 	if (ch->chcaps & AHCI_P_CMD_FBSCP) {
 		ch->fbs_enabled = (fbs && ch->pm_present) ? 1 : 0;
 		ATA_OUTL(ch->r_mem, AHCI_P_FBS,
 		    ch->fbs_enabled ? AHCI_P_FBS_EN : 0);
 	}
 	/* Start operations on this channel */
 	cmd = ATA_INL(ch->r_mem, AHCI_P_CMD);
 	cmd &= ~AHCI_P_CMD_PMA;
 	ATA_OUTL(ch->r_mem, AHCI_P_CMD, cmd | AHCI_P_CMD_ST |
 	    (ch->pm_present ? AHCI_P_CMD_PMA : 0));
 }
 
 static void
 ahci_stop(struct ahci_channel *ch)
 {
 	u_int32_t cmd;
 	int timeout;
 
 	/* Kill all activity on this channel */
 	cmd = ATA_INL(ch->r_mem, AHCI_P_CMD);
 	ATA_OUTL(ch->r_mem, AHCI_P_CMD, cmd & ~AHCI_P_CMD_ST);
 	/* Wait for activity stop. */
 	timeout = 0;
 	do {
 		DELAY(10);
 		if (timeout++ > 50000) {
 			device_printf(ch->dev, "stopping AHCI engine failed\n");
 			break;
 		}
 	} while (ATA_INL(ch->r_mem, AHCI_P_CMD) & AHCI_P_CMD_CR);
 	ch->eslots = 0;
 }
 
 static void
 ahci_clo(struct ahci_channel *ch)
 {
 	u_int32_t cmd;
 	int timeout;
 
 	/* Issue Command List Override if supported */ 
 	if (ch->caps & AHCI_CAP_SCLO) {
 		cmd = ATA_INL(ch->r_mem, AHCI_P_CMD);
 		cmd |= AHCI_P_CMD_CLO;
 		ATA_OUTL(ch->r_mem, AHCI_P_CMD, cmd);
 		timeout = 0;
 		do {
 			DELAY(10);
 			if (timeout++ > 50000) {
 			    device_printf(ch->dev, "executing CLO failed\n");
 			    break;
 			}
 		} while (ATA_INL(ch->r_mem, AHCI_P_CMD) & AHCI_P_CMD_CLO);
 	}
 }
 
 static void
 ahci_stop_fr(struct ahci_channel *ch)
 {
 	u_int32_t cmd;
 	int timeout;
 
 	/* Kill all FIS reception on this channel */
 	cmd = ATA_INL(ch->r_mem, AHCI_P_CMD);
 	ATA_OUTL(ch->r_mem, AHCI_P_CMD, cmd & ~AHCI_P_CMD_FRE);
 	/* Wait for FIS reception stop. */
 	timeout = 0;
 	do {
 		DELAY(10);
 		if (timeout++ > 50000) {
 			device_printf(ch->dev, "stopping AHCI FR engine failed\n");
 			break;
 		}
 	} while (ATA_INL(ch->r_mem, AHCI_P_CMD) & AHCI_P_CMD_FR);
 }
 
 static void
 ahci_start_fr(struct ahci_channel *ch)
 {
 	u_int32_t cmd;
 
 	/* Start FIS reception on this channel */
 	cmd = ATA_INL(ch->r_mem, AHCI_P_CMD);
 	ATA_OUTL(ch->r_mem, AHCI_P_CMD, cmd | AHCI_P_CMD_FRE);
 }
 
 static int
 ahci_wait_ready(struct ahci_channel *ch, int t, int t0)
 {
 	int timeout = 0;
 	uint32_t val;
 
 	while ((val = ATA_INL(ch->r_mem, AHCI_P_TFD)) &
 	    (ATA_S_BUSY | ATA_S_DRQ)) {
 		if (timeout > t) {
 			if (t != 0) {
 				device_printf(ch->dev,
 				    "AHCI reset: device not ready after %dms "
 				    "(tfd = %08x)\n",
 				    MAX(t, 0) + t0, val);
 			}
 			return (EBUSY);
 		}
 		DELAY(1000);
 		timeout++;
 	}
 	if (bootverbose)
 		device_printf(ch->dev, "AHCI reset: device ready after %dms\n",
 		    timeout + t0);
 	return (0);
 }
 
 static void
 ahci_reset_to(void *arg)
 {
 	struct ahci_channel *ch = arg;
 
 	if (ch->resetting == 0)
 		return;
 	ch->resetting--;
 	if (ahci_wait_ready(ch, ch->resetting == 0 ? -1 : 0,
 	    (310 - ch->resetting) * 100) == 0) {
 		ch->resetting = 0;
 		ahci_start(ch, 1);
 		xpt_release_simq(ch->sim, TRUE);
 		return;
 	}
 	if (ch->resetting == 0) {
 		ahci_clo(ch);
 		ahci_start(ch, 1);
 		xpt_release_simq(ch->sim, TRUE);
 		return;
 	}
 	callout_schedule(&ch->reset_timer, hz / 10);
 }
 
 static void
 ahci_reset(struct ahci_channel *ch)
 {
 	struct ahci_controller *ctlr = device_get_softc(device_get_parent(ch->dev));
 	int i;
 
 	xpt_freeze_simq(ch->sim, 1);
 	if (bootverbose)
 		device_printf(ch->dev, "AHCI reset...\n");
 	/* Forget about previous reset. */
 	if (ch->resetting) {
 		ch->resetting = 0;
 		callout_stop(&ch->reset_timer);
 		xpt_release_simq(ch->sim, TRUE);
 	}
 	/* Requeue freezed command. */
 	if (ch->frozen) {
 		union ccb *fccb = ch->frozen;
 		ch->frozen = NULL;
 		fccb->ccb_h.status = CAM_REQUEUE_REQ | CAM_RELEASE_SIMQ;
 		if (!(fccb->ccb_h.status & CAM_DEV_QFRZN)) {
 			xpt_freeze_devq(fccb->ccb_h.path, 1);
 			fccb->ccb_h.status |= CAM_DEV_QFRZN;
 		}
 		ahci_done(ch, fccb);
 	}
 	/* Kill the engine and requeue all running commands. */
 	ahci_stop(ch);
 	for (i = 0; i < ch->numslots; i++) {
 		/* Do we have a running request on slot? */
 		if (ch->slot[i].state < AHCI_SLOT_RUNNING)
 			continue;
 		/* XXX; Commands in loading state. */
 		ahci_end_transaction(&ch->slot[i], AHCI_ERR_INNOCENT);
 	}
 	for (i = 0; i < ch->numslots; i++) {
 		if (!ch->hold[i])
 			continue;
 		ahci_done(ch, ch->hold[i]);
 		ch->hold[i] = NULL;
 		ch->numhslots--;
 	}
 	if (ch->toslots != 0)
 		xpt_release_simq(ch->sim, TRUE);
 	ch->eslots = 0;
 	ch->toslots = 0;
 	ch->wrongccs = 0;
 	ch->fatalerr = 0;
 	/* Tell the XPT about the event */
 	xpt_async(AC_BUS_RESET, ch->path, NULL);
 	/* Disable port interrupts */
 	ATA_OUTL(ch->r_mem, AHCI_P_IE, 0);
 	/* Reset and reconnect PHY, */
 	if (!ahci_sata_phy_reset(ch)) {
 		if (bootverbose)
 			device_printf(ch->dev,
 			    "AHCI reset: device not found\n");
 		ch->devices = 0;
 		/* Enable wanted port interrupts */
 		ATA_OUTL(ch->r_mem, AHCI_P_IE,
 		    (((ch->pm_level != 0) ? AHCI_P_IX_CPD | AHCI_P_IX_MP : 0) |
 		     AHCI_P_IX_PRC | AHCI_P_IX_PC));
 		xpt_release_simq(ch->sim, TRUE);
 		return;
 	}
 	if (bootverbose)
 		device_printf(ch->dev, "AHCI reset: device found\n");
 	/* Wait for clearing busy status. */
 	if (ahci_wait_ready(ch, dumping ? 31000 : 0, 0)) {
 		if (dumping)
 			ahci_clo(ch);
 		else
 			ch->resetting = 310;
 	}
 	ch->devices = 1;
 	/* Enable wanted port interrupts */
 	ATA_OUTL(ch->r_mem, AHCI_P_IE,
 	     (((ch->pm_level != 0) ? AHCI_P_IX_CPD | AHCI_P_IX_MP : 0) |
 	      AHCI_P_IX_TFE | AHCI_P_IX_HBF |
 	      AHCI_P_IX_HBD | AHCI_P_IX_IF | AHCI_P_IX_OF |
 	      ((ch->pm_level == 0) ? AHCI_P_IX_PRC : 0) | AHCI_P_IX_PC |
 	      AHCI_P_IX_DP | AHCI_P_IX_UF | (ctlr->ccc ? 0 : AHCI_P_IX_SDB) |
 	      AHCI_P_IX_DS | AHCI_P_IX_PS | (ctlr->ccc ? 0 : AHCI_P_IX_DHR)));
 	if (ch->resetting)
 		callout_reset(&ch->reset_timer, hz / 10, ahci_reset_to, ch);
 	else {
 		ahci_start(ch, 1);
 		xpt_release_simq(ch->sim, TRUE);
 	}
 }
 
 static int
 ahci_setup_fis(struct ahci_channel *ch, struct ahci_cmd_tab *ctp, union ccb *ccb, int tag)
 {
 	u_int8_t *fis = &ctp->cfis[0];
 
 	bzero(fis, 20);
 	fis[0] = 0x27;  		/* host to device */
 	fis[1] = (ccb->ccb_h.target_id & 0x0f);
 	if (ccb->ccb_h.func_code == XPT_SCSI_IO) {
 		fis[1] |= 0x80;
 		fis[2] = ATA_PACKET_CMD;
 		if ((ccb->ccb_h.flags & CAM_DIR_MASK) != CAM_DIR_NONE &&
 		    ch->curr[ccb->ccb_h.target_id].mode >= ATA_DMA)
 			fis[3] = ATA_F_DMA;
 		else {
 			fis[5] = ccb->csio.dxfer_len;
 		        fis[6] = ccb->csio.dxfer_len >> 8;
 		}
 		fis[7] = ATA_D_LBA;
 		fis[15] = ATA_A_4BIT;
 		bcopy((ccb->ccb_h.flags & CAM_CDB_POINTER) ?
 		    ccb->csio.cdb_io.cdb_ptr : ccb->csio.cdb_io.cdb_bytes,
 		    ctp->acmd, ccb->csio.cdb_len);
 		bzero(ctp->acmd + ccb->csio.cdb_len, 32 - ccb->csio.cdb_len);
 	} else if ((ccb->ataio.cmd.flags & CAM_ATAIO_CONTROL) == 0) {
 		fis[1] |= 0x80;
 		fis[2] = ccb->ataio.cmd.command;
 		fis[3] = ccb->ataio.cmd.features;
 		fis[4] = ccb->ataio.cmd.lba_low;
 		fis[5] = ccb->ataio.cmd.lba_mid;
 		fis[6] = ccb->ataio.cmd.lba_high;
 		fis[7] = ccb->ataio.cmd.device;
 		fis[8] = ccb->ataio.cmd.lba_low_exp;
 		fis[9] = ccb->ataio.cmd.lba_mid_exp;
 		fis[10] = ccb->ataio.cmd.lba_high_exp;
 		fis[11] = ccb->ataio.cmd.features_exp;
 		if (ccb->ataio.cmd.flags & CAM_ATAIO_FPDMA) {
 			fis[12] = tag << 3;
 		} else {
 			fis[12] = ccb->ataio.cmd.sector_count;
 		}
 		fis[13] = ccb->ataio.cmd.sector_count_exp;
 		fis[15] = ATA_A_4BIT;
 	} else {
 		fis[15] = ccb->ataio.cmd.control;
 	}
 	if (ccb->ataio.ata_flags & ATA_FLAG_AUX) {
 		fis[16] =  ccb->ataio.aux        & 0xff;
 		fis[17] = (ccb->ataio.aux >>  8) & 0xff;
 		fis[18] = (ccb->ataio.aux >> 16) & 0xff;
 		fis[19] = (ccb->ataio.aux >> 24) & 0xff;
 	}
 	return (20);
 }
 
 static int
 ahci_sata_connect(struct ahci_channel *ch)
 {
 	u_int32_t status;
 	int timeout, found = 0;
 
 	/* Wait up to 100ms for "connect well" */
 	for (timeout = 0; timeout < 1000 ; timeout++) {
 		status = ATA_INL(ch->r_mem, AHCI_P_SSTS);
 		if ((status & ATA_SS_DET_MASK) != ATA_SS_DET_NO_DEVICE)
 			found = 1;
 		if (((status & ATA_SS_DET_MASK) == ATA_SS_DET_PHY_ONLINE) &&
 		    ((status & ATA_SS_SPD_MASK) != ATA_SS_SPD_NO_SPEED) &&
 		    ((status & ATA_SS_IPM_MASK) == ATA_SS_IPM_ACTIVE))
 			break;
 		if ((status & ATA_SS_DET_MASK) == ATA_SS_DET_PHY_OFFLINE) {
 			if (bootverbose) {
 				device_printf(ch->dev, "SATA offline status=%08x\n",
 				    status);
 			}
 			return (0);
 		}
 		if (found == 0 && timeout >= 100)
 			break;
 		DELAY(100);
 	}
 	if (timeout >= 1000 || !found) {
 		if (bootverbose) {
 			device_printf(ch->dev,
 			    "SATA connect timeout time=%dus status=%08x\n",
 			    timeout * 100, status);
 		}
 		return (0);
 	}
 	if (bootverbose) {
 		device_printf(ch->dev, "SATA connect time=%dus status=%08x\n",
 		    timeout * 100, status);
 	}
 	/* Clear SATA error register */
 	ATA_OUTL(ch->r_mem, AHCI_P_SERR, 0xffffffff);
 	return (1);
 }
 
 static int
 ahci_sata_phy_reset(struct ahci_channel *ch)
 {
 	int sata_rev;
 	uint32_t val;
 
 	if (ch->listening) {
 		val = ATA_INL(ch->r_mem, AHCI_P_CMD);
 		val |= AHCI_P_CMD_SUD;
 		ATA_OUTL(ch->r_mem, AHCI_P_CMD, val);
 		ch->listening = 0;
 	}
 	sata_rev = ch->user[ch->pm_present ? 15 : 0].revision;
 	if (sata_rev == 1)
 		val = ATA_SC_SPD_SPEED_GEN1;
 	else if (sata_rev == 2)
 		val = ATA_SC_SPD_SPEED_GEN2;
 	else if (sata_rev == 3)
 		val = ATA_SC_SPD_SPEED_GEN3;
 	else
 		val = 0;
 	ATA_OUTL(ch->r_mem, AHCI_P_SCTL,
 	    ATA_SC_DET_RESET | val |
 	    ATA_SC_IPM_DIS_PARTIAL | ATA_SC_IPM_DIS_SLUMBER);
 	DELAY(1000);
 	ATA_OUTL(ch->r_mem, AHCI_P_SCTL,
 	    ATA_SC_DET_IDLE | val | ((ch->pm_level > 0) ? 0 :
 	    (ATA_SC_IPM_DIS_PARTIAL | ATA_SC_IPM_DIS_SLUMBER)));
 	if (!ahci_sata_connect(ch)) {
 		if (ch->caps & AHCI_CAP_SSS) {
 			val = ATA_INL(ch->r_mem, AHCI_P_CMD);
 			val &= ~AHCI_P_CMD_SUD;
 			ATA_OUTL(ch->r_mem, AHCI_P_CMD, val);
 			ch->listening = 1;
 		} else if (ch->pm_level > 0)
 			ATA_OUTL(ch->r_mem, AHCI_P_SCTL, ATA_SC_DET_DISABLE);
 		return (0);
 	}
 	return (1);
 }
 
 static int
 ahci_check_ids(struct ahci_channel *ch, union ccb *ccb)
 {
 
 	if (ccb->ccb_h.target_id > ((ch->caps & AHCI_CAP_SPM) ? 15 : 0)) {
 		ccb->ccb_h.status = CAM_TID_INVALID;
 		ahci_done(ch, ccb);
 		return (-1);
 	}
 	if (ccb->ccb_h.target_lun != 0) {
 		ccb->ccb_h.status = CAM_LUN_INVALID;
 		ahci_done(ch, ccb);
 		return (-1);
 	}
 	return (0);
 }
 
 static void
 ahciaction(struct cam_sim *sim, union ccb *ccb)
 {
 	struct ahci_channel *ch;
 
 	CAM_DEBUG(ccb->ccb_h.path, CAM_DEBUG_TRACE, ("ahciaction func_code=%x\n",
 	    ccb->ccb_h.func_code));
 
 	ch = (struct ahci_channel *)cam_sim_softc(sim);
 	switch (ccb->ccb_h.func_code) {
 	/* Common cases first */
 	case XPT_ATA_IO:	/* Execute the requested I/O operation */
 	case XPT_SCSI_IO:
 		if (ahci_check_ids(ch, ccb))
 			return;
 		if (ch->devices == 0 ||
 		    (ch->pm_present == 0 &&
 		     ccb->ccb_h.target_id > 0 && ccb->ccb_h.target_id < 15)) {
 			ccb->ccb_h.status = CAM_SEL_TIMEOUT;
 			break;
 		}
 		ccb->ccb_h.recovery_type = RECOVERY_NONE;
 		/* Check for command collision. */
 		if (ahci_check_collision(ch, ccb)) {
 			/* Freeze command. */
 			ch->frozen = ccb;
 			/* We have only one frozen slot, so freeze simq also. */
 			xpt_freeze_simq(ch->sim, 1);
 			return;
 		}
 		ahci_begin_transaction(ch, ccb);
 		return;
 	case XPT_EN_LUN:		/* Enable LUN as a target */
 	case XPT_TARGET_IO:		/* Execute target I/O request */
 	case XPT_ACCEPT_TARGET_IO:	/* Accept Host Target Mode CDB */
 	case XPT_CONT_TARGET_IO:	/* Continue Host Target I/O Connection*/
 	case XPT_ABORT:			/* Abort the specified CCB */
 		/* XXX Implement */
 		ccb->ccb_h.status = CAM_REQ_INVALID;
 		break;
 	case XPT_SET_TRAN_SETTINGS:
 	{
 		struct	ccb_trans_settings *cts = &ccb->cts;
 		struct	ahci_device *d; 
 
 		if (ahci_check_ids(ch, ccb))
 			return;
 		if (cts->type == CTS_TYPE_CURRENT_SETTINGS)
 			d = &ch->curr[ccb->ccb_h.target_id];
 		else
 			d = &ch->user[ccb->ccb_h.target_id];
 		if (cts->xport_specific.sata.valid & CTS_SATA_VALID_REVISION)
 			d->revision = cts->xport_specific.sata.revision;
 		if (cts->xport_specific.sata.valid & CTS_SATA_VALID_MODE)
 			d->mode = cts->xport_specific.sata.mode;
 		if (cts->xport_specific.sata.valid & CTS_SATA_VALID_BYTECOUNT)
 			d->bytecount = min(8192, cts->xport_specific.sata.bytecount);
 		if (cts->xport_specific.sata.valid & CTS_SATA_VALID_TAGS)
 			d->tags = min(ch->numslots, cts->xport_specific.sata.tags);
 		if (cts->xport_specific.sata.valid & CTS_SATA_VALID_PM)
 			ch->pm_present = cts->xport_specific.sata.pm_present;
 		if (cts->xport_specific.sata.valid & CTS_SATA_VALID_ATAPI)
 			d->atapi = cts->xport_specific.sata.atapi;
 		if (cts->xport_specific.sata.valid & CTS_SATA_VALID_CAPS)
 			d->caps = cts->xport_specific.sata.caps;
 		ccb->ccb_h.status = CAM_REQ_CMP;
 		break;
 	}
 	case XPT_GET_TRAN_SETTINGS:
 	/* Get default/user set transfer settings for the target */
 	{
 		struct	ccb_trans_settings *cts = &ccb->cts;
 		struct  ahci_device *d;
 		uint32_t status;
 
 		if (ahci_check_ids(ch, ccb))
 			return;
 		if (cts->type == CTS_TYPE_CURRENT_SETTINGS)
 			d = &ch->curr[ccb->ccb_h.target_id];
 		else
 			d = &ch->user[ccb->ccb_h.target_id];
 		cts->protocol = PROTO_UNSPECIFIED;
 		cts->protocol_version = PROTO_VERSION_UNSPECIFIED;
 		cts->transport = XPORT_SATA;
 		cts->transport_version = XPORT_VERSION_UNSPECIFIED;
 		cts->proto_specific.valid = 0;
 		cts->xport_specific.sata.valid = 0;
 		if (cts->type == CTS_TYPE_CURRENT_SETTINGS &&
 		    (ccb->ccb_h.target_id == 15 ||
 		    (ccb->ccb_h.target_id == 0 && !ch->pm_present))) {
 			status = ATA_INL(ch->r_mem, AHCI_P_SSTS) & ATA_SS_SPD_MASK;
 			if (status & 0x0f0) {
 				cts->xport_specific.sata.revision =
 				    (status & 0x0f0) >> 4;
 				cts->xport_specific.sata.valid |=
 				    CTS_SATA_VALID_REVISION;
 			}
 			cts->xport_specific.sata.caps = d->caps & CTS_SATA_CAPS_D;
 			if (ch->pm_level) {
 				if (ch->caps & (AHCI_CAP_PSC | AHCI_CAP_SSC))
 					cts->xport_specific.sata.caps |= CTS_SATA_CAPS_H_PMREQ;
 				if (ch->caps2 & AHCI_CAP2_APST)
 					cts->xport_specific.sata.caps |= CTS_SATA_CAPS_H_APST;
 			}
 			if ((ch->caps & AHCI_CAP_SNCQ) &&
 			    (ch->quirks & AHCI_Q_NOAA) == 0)
 				cts->xport_specific.sata.caps |= CTS_SATA_CAPS_H_DMAAA;
 			cts->xport_specific.sata.caps |= CTS_SATA_CAPS_H_AN;
 			cts->xport_specific.sata.caps &=
 			    ch->user[ccb->ccb_h.target_id].caps;
 			cts->xport_specific.sata.valid |= CTS_SATA_VALID_CAPS;
 		} else {
 			cts->xport_specific.sata.revision = d->revision;
 			cts->xport_specific.sata.valid |= CTS_SATA_VALID_REVISION;
 			cts->xport_specific.sata.caps = d->caps;
 			cts->xport_specific.sata.valid |= CTS_SATA_VALID_CAPS;
 		}
 		cts->xport_specific.sata.mode = d->mode;
 		cts->xport_specific.sata.valid |= CTS_SATA_VALID_MODE;
 		cts->xport_specific.sata.bytecount = d->bytecount;
 		cts->xport_specific.sata.valid |= CTS_SATA_VALID_BYTECOUNT;
 		cts->xport_specific.sata.pm_present = ch->pm_present;
 		cts->xport_specific.sata.valid |= CTS_SATA_VALID_PM;
 		cts->xport_specific.sata.tags = d->tags;
 		cts->xport_specific.sata.valid |= CTS_SATA_VALID_TAGS;
 		cts->xport_specific.sata.atapi = d->atapi;
 		cts->xport_specific.sata.valid |= CTS_SATA_VALID_ATAPI;
 		ccb->ccb_h.status = CAM_REQ_CMP;
 		break;
 	}
 	case XPT_RESET_BUS:		/* Reset the specified SCSI bus */
 	case XPT_RESET_DEV:	/* Bus Device Reset the specified SCSI device */
 		ahci_reset(ch);
 		ccb->ccb_h.status = CAM_REQ_CMP;
 		break;
 	case XPT_TERM_IO:		/* Terminate the I/O process */
 		/* XXX Implement */
 		ccb->ccb_h.status = CAM_REQ_INVALID;
 		break;
 	case XPT_PATH_INQ:		/* Path routing inquiry */
 	{
 		struct ccb_pathinq *cpi = &ccb->cpi;
 
 		cpi->version_num = 1; /* XXX??? */
 		cpi->hba_inquiry = PI_SDTR_ABLE;
 		if (ch->caps & AHCI_CAP_SNCQ)
 			cpi->hba_inquiry |= PI_TAG_ABLE;
 		if (ch->caps & AHCI_CAP_SPM)
 			cpi->hba_inquiry |= PI_SATAPM;
 		cpi->target_sprt = 0;
 		cpi->hba_misc = PIM_SEQSCAN | PIM_UNMAPPED | PIM_ATA_EXT;
 		cpi->hba_eng_cnt = 0;
 		if (ch->caps & AHCI_CAP_SPM)
 			cpi->max_target = 15;
 		else
 			cpi->max_target = 0;
 		cpi->max_lun = 0;
 		cpi->initiator_id = 0;
 		cpi->bus_id = cam_sim_bus(sim);
 		cpi->base_transfer_speed = 150000;
 		strncpy(cpi->sim_vid, "FreeBSD", SIM_IDLEN);
 		strncpy(cpi->hba_vid, "AHCI", HBA_IDLEN);
 		strncpy(cpi->dev_name, cam_sim_name(sim), DEV_IDLEN);
 		cpi->unit_number = cam_sim_unit(sim);
 		cpi->transport = XPORT_SATA;
 		cpi->transport_version = XPORT_VERSION_UNSPECIFIED;
 		cpi->protocol = PROTO_ATA;
 		cpi->protocol_version = PROTO_VERSION_UNSPECIFIED;
 		cpi->maxio = MAXPHYS;
 		/* ATI SB600 can't handle 256 sectors with FPDMA (NCQ). */
 		if (ch->quirks & AHCI_Q_MAXIO_64K)
 			cpi->maxio = min(cpi->maxio, 128 * 512);
 		cpi->hba_vendor = ch->vendorid;
 		cpi->hba_device = ch->deviceid;
 		cpi->hba_subvendor = ch->subvendorid;
 		cpi->hba_subdevice = ch->subdeviceid;
 		cpi->ccb_h.status = CAM_REQ_CMP;
 		break;
 	}
 	default:
 		ccb->ccb_h.status = CAM_REQ_INVALID;
 		break;
 	}
 	ahci_done(ch, ccb);
 }
 
 static void
 ahcipoll(struct cam_sim *sim)
 {
 	struct ahci_channel *ch = (struct ahci_channel *)cam_sim_softc(sim);
 	uint32_t istatus;
 
 	/* Read interrupt statuses and process if any. */
 	istatus = ATA_INL(ch->r_mem, AHCI_P_IS);
 	if (istatus != 0)
 		ahci_ch_intr_main(ch, istatus);
 	if (ch->resetting != 0 &&
 	    (--ch->resetpolldiv <= 0 || !callout_pending(&ch->reset_timer))) {
 		ch->resetpolldiv = 1000;
 		ahci_reset_to(ch);
 	}
 }
 MODULE_VERSION(ahci, 1);
 MODULE_DEPEND(ahci, cam, 1, 1, 1);
Index: projects/clang391-import/sys/dev/ahci/ahci_pci.c
===================================================================
--- projects/clang391-import/sys/dev/ahci/ahci_pci.c	(revision 309262)
+++ projects/clang391-import/sys/dev/ahci/ahci_pci.c	(revision 309263)
@@ -1,641 +1,648 @@
 /*-
  * Copyright (c) 2009-2012 Alexander Motin <mav@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer,
  *    without modification, immediately at the beginning of the file.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/module.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/bus.h>
 #include <sys/conf.h>
 #include <sys/endian.h>
 #include <sys/malloc.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <machine/stdarg.h>
 #include <machine/resource.h>
 #include <machine/bus.h>
 #include <sys/rman.h>
 #include <dev/pci/pcivar.h>
 #include <dev/pci/pcireg.h>
 #include "ahci.h"
 
 static int force_ahci = 1;
 TUNABLE_INT("hw.ahci.force", &force_ahci);
 
 static const struct {
 	uint32_t	id;
 	uint8_t		rev;
 	const char	*name;
 	int		quirks;
 } ahci_ids[] = {
 	{0x43801002, 0x00, "AMD SB600",
 	    AHCI_Q_NOMSI | AHCI_Q_ATI_PMP_BUG | AHCI_Q_MAXIO_64K},
 	{0x43901002, 0x00, "AMD SB7x0/SB8x0/SB9x0",
 	    AHCI_Q_ATI_PMP_BUG | AHCI_Q_1MSI},
 	{0x43911002, 0x00, "AMD SB7x0/SB8x0/SB9x0",
 	    AHCI_Q_ATI_PMP_BUG | AHCI_Q_1MSI},
 	{0x43921002, 0x00, "AMD SB7x0/SB8x0/SB9x0",
 	    AHCI_Q_ATI_PMP_BUG | AHCI_Q_1MSI},
 	{0x43931002, 0x00, "AMD SB7x0/SB8x0/SB9x0",
 	    AHCI_Q_ATI_PMP_BUG | AHCI_Q_1MSI},
 	{0x43941002, 0x00, "AMD SB7x0/SB8x0/SB9x0",
 	    AHCI_Q_ATI_PMP_BUG | AHCI_Q_1MSI},
 	/* Not sure SB8x0/SB9x0 needs this quirk. Be conservative though */
 	{0x43951002, 0x00, "AMD SB8x0/SB9x0",	AHCI_Q_ATI_PMP_BUG},
 	{0x78001022, 0x00, "AMD Hudson-2",	0},
 	{0x78011022, 0x00, "AMD Hudson-2",	0},
 	{0x78021022, 0x00, "AMD Hudson-2",	0},
 	{0x78031022, 0x00, "AMD Hudson-2",	0},
 	{0x78041022, 0x00, "AMD Hudson-2",	0},
-	{0x06111b21, 0x00, "ASMedia ASM2106",	0},
-	{0x06121b21, 0x00, "ASMedia ASM1061",	0},
+	{0x06011b21, 0x00, "ASMedia ASM1060",	0},
+	{0x06021b21, 0x00, "ASMedia ASM1060",	0},
+	{0x06111b21, 0x00, "ASMedia ASM1061",	0},
+	{0x06121b21, 0x00, "ASMedia ASM1062",	0},
+	{0x06201b21, 0x00, "ASMedia ASM106x",	0},
+	{0x06211b21, 0x00, "ASMedia ASM106x",	0},
+	{0x06221b21, 0x00, "ASMedia ASM106x",	0},
+	{0x06241b21, 0x00, "ASMedia ASM106x",	0},
+	{0x06251b21, 0x00, "ASMedia ASM106x",	0},
 	{0x26528086, 0x00, "Intel ICH6",	AHCI_Q_NOFORCE},
 	{0x26538086, 0x00, "Intel ICH6M",	AHCI_Q_NOFORCE},
 	{0x26818086, 0x00, "Intel ESB2",	0},
 	{0x26828086, 0x00, "Intel ESB2",	0},
 	{0x26838086, 0x00, "Intel ESB2",	0},
 	{0x27c18086, 0x00, "Intel ICH7",	0},
 	{0x27c38086, 0x00, "Intel ICH7",	0},
 	{0x27c58086, 0x00, "Intel ICH7M",	0},
 	{0x27c68086, 0x00, "Intel ICH7M",	0},
 	{0x28218086, 0x00, "Intel ICH8",	0},
 	{0x28228086, 0x00, "Intel ICH8",	0},
 	{0x28248086, 0x00, "Intel ICH8",	0},
 	{0x28298086, 0x00, "Intel ICH8M",	0},
 	{0x282a8086, 0x00, "Intel ICH8M",	0},
 	{0x29228086, 0x00, "Intel ICH9",	0},
 	{0x29238086, 0x00, "Intel ICH9",	0},
 	{0x29248086, 0x00, "Intel ICH9",	0},
 	{0x29258086, 0x00, "Intel ICH9",	0},
 	{0x29278086, 0x00, "Intel ICH9",	0},
 	{0x29298086, 0x00, "Intel ICH9M",	0},
 	{0x292a8086, 0x00, "Intel ICH9M",	0},
 	{0x292b8086, 0x00, "Intel ICH9M",	0},
 	{0x292c8086, 0x00, "Intel ICH9M",	0},
 	{0x292f8086, 0x00, "Intel ICH9M",	0},
 	{0x294d8086, 0x00, "Intel ICH9",	0},
 	{0x294e8086, 0x00, "Intel ICH9M",	0},
 	{0x3a058086, 0x00, "Intel ICH10",	0},
 	{0x3a228086, 0x00, "Intel ICH10",	0},
 	{0x3a258086, 0x00, "Intel ICH10",	0},
 	{0x3b228086, 0x00, "Intel 5 Series/3400 Series",	0},
 	{0x3b238086, 0x00, "Intel 5 Series/3400 Series",	0},
 	{0x3b258086, 0x00, "Intel 5 Series/3400 Series",	0},
 	{0x3b298086, 0x00, "Intel 5 Series/3400 Series",	0},
 	{0x3b2c8086, 0x00, "Intel 5 Series/3400 Series",	0},
 	{0x3b2f8086, 0x00, "Intel 5 Series/3400 Series",	0},
 	{0x1c028086, 0x00, "Intel Cougar Point",	0},
 	{0x1c038086, 0x00, "Intel Cougar Point",	0},
 	{0x1c048086, 0x00, "Intel Cougar Point",	0},
 	{0x1c058086, 0x00, "Intel Cougar Point",	0},
 	{0x1d028086, 0x00, "Intel Patsburg",	0},
 	{0x1d048086, 0x00, "Intel Patsburg",	0},
 	{0x1d068086, 0x00, "Intel Patsburg",	0},
 	{0x28268086, 0x00, "Intel Patsburg (RAID)",	0},
 	{0x1e028086, 0x00, "Intel Panther Point",	0},
 	{0x1e038086, 0x00, "Intel Panther Point",	0},
 	{0x1e048086, 0x00, "Intel Panther Point (RAID)",	0},
 	{0x1e058086, 0x00, "Intel Panther Point (RAID)",	0},
 	{0x1e068086, 0x00, "Intel Panther Point (RAID)",	0},
 	{0x1e078086, 0x00, "Intel Panther Point (RAID)",	0},
 	{0x1e0e8086, 0x00, "Intel Panther Point (RAID)",	0},
 	{0x1e0f8086, 0x00, "Intel Panther Point (RAID)",	0},
 	{0x1f228086, 0x00, "Intel Avoton",	0},
 	{0x1f238086, 0x00, "Intel Avoton",	0},
 	{0x1f248086, 0x00, "Intel Avoton (RAID)",	0},
 	{0x1f258086, 0x00, "Intel Avoton (RAID)",	0},
 	{0x1f268086, 0x00, "Intel Avoton (RAID)",	0},
 	{0x1f278086, 0x00, "Intel Avoton (RAID)",	0},
 	{0x1f2e8086, 0x00, "Intel Avoton (RAID)",	0},
 	{0x1f2f8086, 0x00, "Intel Avoton (RAID)",	0},
 	{0x1f328086, 0x00, "Intel Avoton",	0},
 	{0x1f338086, 0x00, "Intel Avoton",	0},
 	{0x1f348086, 0x00, "Intel Avoton (RAID)",	0},
 	{0x1f358086, 0x00, "Intel Avoton (RAID)",	0},
 	{0x1f368086, 0x00, "Intel Avoton (RAID)",	0},
 	{0x1f378086, 0x00, "Intel Avoton (RAID)",	0},
 	{0x1f3e8086, 0x00, "Intel Avoton (RAID)",	0},
 	{0x1f3f8086, 0x00, "Intel Avoton (RAID)",	0},
 	{0x23a38086, 0x00, "Intel Coleto Creek",	0},
 	{0x28238086, 0x00, "Intel Wellsburg (RAID)",	0},
 	{0x28278086, 0x00, "Intel Wellsburg (RAID)",	0},
 	{0x8c028086, 0x00, "Intel Lynx Point",	0},
 	{0x8c038086, 0x00, "Intel Lynx Point",	0},
 	{0x8c048086, 0x00, "Intel Lynx Point (RAID)",	0},
 	{0x8c058086, 0x00, "Intel Lynx Point (RAID)",	0},
 	{0x8c068086, 0x00, "Intel Lynx Point (RAID)",	0},
 	{0x8c078086, 0x00, "Intel Lynx Point (RAID)",	0},
 	{0x8c0e8086, 0x00, "Intel Lynx Point (RAID)",	0},
 	{0x8c0f8086, 0x00, "Intel Lynx Point (RAID)",	0},
 	{0x8c828086, 0x00, "Intel Wildcat Point",	0},
 	{0x8c838086, 0x00, "Intel Wildcat Point",	0},
 	{0x8c848086, 0x00, "Intel Wildcat Point (RAID)",	0},
 	{0x8c858086, 0x00, "Intel Wildcat Point (RAID)",	0},
 	{0x8c868086, 0x00, "Intel Wildcat Point (RAID)",	0},
 	{0x8c878086, 0x00, "Intel Wildcat Point (RAID)",	0},
 	{0x8c8e8086, 0x00, "Intel Wildcat Point (RAID)",	0},
 	{0x8c8f8086, 0x00, "Intel Wildcat Point (RAID)",	0},
 	{0x8d028086, 0x00, "Intel Wellsburg",	0},
 	{0x8d048086, 0x00, "Intel Wellsburg (RAID)",	0},
 	{0x8d068086, 0x00, "Intel Wellsburg (RAID)",	0},
 	{0x8d628086, 0x00, "Intel Wellsburg",	0},
 	{0x8d648086, 0x00, "Intel Wellsburg (RAID)",	0},
 	{0x8d668086, 0x00, "Intel Wellsburg (RAID)",	0},
 	{0x8d6e8086, 0x00, "Intel Wellsburg (RAID)",	0},
 	{0x9c028086, 0x00, "Intel Lynx Point-LP",	0},
 	{0x9c038086, 0x00, "Intel Lynx Point-LP",	0},
 	{0x9c048086, 0x00, "Intel Lynx Point-LP (RAID)",	0},
 	{0x9c058086, 0x00, "Intel Lynx Point-LP (RAID)",	0},
 	{0x9c068086, 0x00, "Intel Lynx Point-LP (RAID)",	0},
 	{0x9c078086, 0x00, "Intel Lynx Point-LP (RAID)",	0},
 	{0x9c0e8086, 0x00, "Intel Lynx Point-LP (RAID)",	0},
 	{0x9c0f8086, 0x00, "Intel Lynx Point-LP (RAID)",	0},
 	{0x9d038086, 0x00, "Intel Sunrise Point-LP",	0},
 	{0x9d058086, 0x00, "Intel Sunrise Point-LP (RAID)",	0},
 	{0x9d078086, 0x00, "Intel Sunrise Point-LP (RAID)",	0},
 	{0xa1028086, 0x00, "Intel Sunrise Point",	0},
 	{0xa1038086, 0x00, "Intel Sunrise Point",	0},
 	{0xa1058086, 0x00, "Intel Sunrise Point (RAID)",	0},
 	{0xa1068086, 0x00, "Intel Sunrise Point (RAID)",	0},
 	{0xa1078086, 0x00, "Intel Sunrise Point (RAID)",	0},
 	{0xa10f8086, 0x00, "Intel Sunrise Point (RAID)",	0},
 	{0x23238086, 0x00, "Intel DH89xxCC",	0},
 	{0x2360197b, 0x00, "JMicron JMB360",	0},
 	{0x2361197b, 0x00, "JMicron JMB361",	AHCI_Q_NOFORCE | AHCI_Q_1CH},
 	{0x2362197b, 0x00, "JMicron JMB362",	0},
 	{0x2363197b, 0x00, "JMicron JMB363",	AHCI_Q_NOFORCE},
 	{0x2365197b, 0x00, "JMicron JMB365",	AHCI_Q_NOFORCE},
 	{0x2366197b, 0x00, "JMicron JMB366",	AHCI_Q_NOFORCE},
 	{0x2368197b, 0x00, "JMicron JMB368",	AHCI_Q_NOFORCE},
 	{0x611111ab, 0x00, "Marvell 88SE6111",	AHCI_Q_NOFORCE | AHCI_Q_NOPMP |
 	    AHCI_Q_1CH | AHCI_Q_EDGEIS},
 	{0x612111ab, 0x00, "Marvell 88SE6121",	AHCI_Q_NOFORCE | AHCI_Q_NOPMP |
 	    AHCI_Q_2CH | AHCI_Q_EDGEIS | AHCI_Q_NONCQ | AHCI_Q_NOCOUNT},
 	{0x614111ab, 0x00, "Marvell 88SE6141",	AHCI_Q_NOFORCE | AHCI_Q_NOPMP |
 	    AHCI_Q_4CH | AHCI_Q_EDGEIS | AHCI_Q_NONCQ | AHCI_Q_NOCOUNT},
 	{0x614511ab, 0x00, "Marvell 88SE6145",	AHCI_Q_NOFORCE | AHCI_Q_NOPMP |
 	    AHCI_Q_4CH | AHCI_Q_EDGEIS | AHCI_Q_NONCQ | AHCI_Q_NOCOUNT},
 	{0x91201b4b, 0x00, "Marvell 88SE912x",	AHCI_Q_EDGEIS},
 	{0x91231b4b, 0x11, "Marvell 88SE912x",	AHCI_Q_ALTSIG},
 	{0x91231b4b, 0x00, "Marvell 88SE912x",	AHCI_Q_EDGEIS|AHCI_Q_SATA2},
 	{0x91251b4b, 0x00, "Marvell 88SE9125",	0},
 	{0x91281b4b, 0x00, "Marvell 88SE9128",	AHCI_Q_ALTSIG},
 	{0x91301b4b, 0x00, "Marvell 88SE9130",  AHCI_Q_ALTSIG},
 	{0x91721b4b, 0x00, "Marvell 88SE9172",	0},
 	{0x91821b4b, 0x00, "Marvell 88SE9182",	0},
 	{0x91831b4b, 0x00, "Marvell 88SS9183",	0},
 	{0x91a01b4b, 0x00, "Marvell 88SE91Ax",	0},
 	{0x92151b4b, 0x00, "Marvell 88SE9215",  0},
 	{0x92201b4b, 0x00, "Marvell 88SE9220",  AHCI_Q_ALTSIG},
 	{0x92301b4b, 0x00, "Marvell 88SE9230",  AHCI_Q_ALTSIG},
 	{0x92351b4b, 0x00, "Marvell 88SE9235",  0},
 	{0x06201103, 0x00, "HighPoint RocketRAID 620",	0},
 	{0x06201b4b, 0x00, "HighPoint RocketRAID 620",	0},
 	{0x06221103, 0x00, "HighPoint RocketRAID 622",	0},
 	{0x06221b4b, 0x00, "HighPoint RocketRAID 622",	0},
 	{0x06401103, 0x00, "HighPoint RocketRAID 640",	0},
 	{0x06401b4b, 0x00, "HighPoint RocketRAID 640",	0},
 	{0x06441103, 0x00, "HighPoint RocketRAID 644",	0},
 	{0x06441b4b, 0x00, "HighPoint RocketRAID 644",	0},
 	{0x06411103, 0x00, "HighPoint RocketRAID 640L",	0},
 	{0x06421103, 0x00, "HighPoint RocketRAID 642L",	0},
 	{0x06451103, 0x00, "HighPoint RocketRAID 644L",	0},
 	{0x044c10de, 0x00, "NVIDIA MCP65",	AHCI_Q_NOAA},
 	{0x044d10de, 0x00, "NVIDIA MCP65",	AHCI_Q_NOAA},
 	{0x044e10de, 0x00, "NVIDIA MCP65",	AHCI_Q_NOAA},
 	{0x044f10de, 0x00, "NVIDIA MCP65",	AHCI_Q_NOAA},
 	{0x045c10de, 0x00, "NVIDIA MCP65",	AHCI_Q_NOAA},
 	{0x045d10de, 0x00, "NVIDIA MCP65",	AHCI_Q_NOAA},
 	{0x045e10de, 0x00, "NVIDIA MCP65",	AHCI_Q_NOAA},
 	{0x045f10de, 0x00, "NVIDIA MCP65",	AHCI_Q_NOAA},
 	{0x055010de, 0x00, "NVIDIA MCP67",	AHCI_Q_NOAA},
 	{0x055110de, 0x00, "NVIDIA MCP67",	AHCI_Q_NOAA},
 	{0x055210de, 0x00, "NVIDIA MCP67",	AHCI_Q_NOAA},
 	{0x055310de, 0x00, "NVIDIA MCP67",	AHCI_Q_NOAA},
 	{0x055410de, 0x00, "NVIDIA MCP67",	AHCI_Q_NOAA},
 	{0x055510de, 0x00, "NVIDIA MCP67",	AHCI_Q_NOAA},
 	{0x055610de, 0x00, "NVIDIA MCP67",	AHCI_Q_NOAA},
 	{0x055710de, 0x00, "NVIDIA MCP67",	AHCI_Q_NOAA},
 	{0x055810de, 0x00, "NVIDIA MCP67",	AHCI_Q_NOAA},
 	{0x055910de, 0x00, "NVIDIA MCP67",	AHCI_Q_NOAA},
 	{0x055A10de, 0x00, "NVIDIA MCP67",	AHCI_Q_NOAA},
 	{0x055B10de, 0x00, "NVIDIA MCP67",	AHCI_Q_NOAA},
 	{0x058410de, 0x00, "NVIDIA MCP67",	AHCI_Q_NOAA},
 	{0x07f010de, 0x00, "NVIDIA MCP73",	AHCI_Q_NOAA},
 	{0x07f110de, 0x00, "NVIDIA MCP73",	AHCI_Q_NOAA},
 	{0x07f210de, 0x00, "NVIDIA MCP73",	AHCI_Q_NOAA},
 	{0x07f310de, 0x00, "NVIDIA MCP73",	AHCI_Q_NOAA},
 	{0x07f410de, 0x00, "NVIDIA MCP73",	AHCI_Q_NOAA},
 	{0x07f510de, 0x00, "NVIDIA MCP73",	AHCI_Q_NOAA},
 	{0x07f610de, 0x00, "NVIDIA MCP73",	AHCI_Q_NOAA},
 	{0x07f710de, 0x00, "NVIDIA MCP73",	AHCI_Q_NOAA},
 	{0x07f810de, 0x00, "NVIDIA MCP73",	AHCI_Q_NOAA},
 	{0x07f910de, 0x00, "NVIDIA MCP73",	AHCI_Q_NOAA},
 	{0x07fa10de, 0x00, "NVIDIA MCP73",	AHCI_Q_NOAA},
 	{0x07fb10de, 0x00, "NVIDIA MCP73",	AHCI_Q_NOAA},
 	{0x0ad010de, 0x00, "NVIDIA MCP77",	AHCI_Q_NOAA},
 	{0x0ad110de, 0x00, "NVIDIA MCP77",	AHCI_Q_NOAA},
 	{0x0ad210de, 0x00, "NVIDIA MCP77",	AHCI_Q_NOAA},
 	{0x0ad310de, 0x00, "NVIDIA MCP77",	AHCI_Q_NOAA},
 	{0x0ad410de, 0x00, "NVIDIA MCP77",	AHCI_Q_NOAA},
 	{0x0ad510de, 0x00, "NVIDIA MCP77",	AHCI_Q_NOAA},
 	{0x0ad610de, 0x00, "NVIDIA MCP77",	AHCI_Q_NOAA},
 	{0x0ad710de, 0x00, "NVIDIA MCP77",	AHCI_Q_NOAA},
 	{0x0ad810de, 0x00, "NVIDIA MCP77",	AHCI_Q_NOAA},
 	{0x0ad910de, 0x00, "NVIDIA MCP77",	AHCI_Q_NOAA},
 	{0x0ada10de, 0x00, "NVIDIA MCP77",	AHCI_Q_NOAA},
 	{0x0adb10de, 0x00, "NVIDIA MCP77",	AHCI_Q_NOAA},
 	{0x0ab410de, 0x00, "NVIDIA MCP79",	AHCI_Q_NOAA},
 	{0x0ab510de, 0x00, "NVIDIA MCP79",	AHCI_Q_NOAA},
 	{0x0ab610de, 0x00, "NVIDIA MCP79",	AHCI_Q_NOAA},
 	{0x0ab710de, 0x00, "NVIDIA MCP79",	AHCI_Q_NOAA},
 	{0x0ab810de, 0x00, "NVIDIA MCP79",	AHCI_Q_NOAA},
 	{0x0ab910de, 0x00, "NVIDIA MCP79",	AHCI_Q_NOAA},
 	{0x0aba10de, 0x00, "NVIDIA MCP79",	AHCI_Q_NOAA},
 	{0x0abb10de, 0x00, "NVIDIA MCP79",	AHCI_Q_NOAA},
 	{0x0abc10de, 0x00, "NVIDIA MCP79",	AHCI_Q_NOAA},
 	{0x0abd10de, 0x00, "NVIDIA MCP79",	AHCI_Q_NOAA},
 	{0x0abe10de, 0x00, "NVIDIA MCP79",	AHCI_Q_NOAA},
 	{0x0abf10de, 0x00, "NVIDIA MCP79",	AHCI_Q_NOAA},
 	{0x0d8410de, 0x00, "NVIDIA MCP89",	AHCI_Q_NOAA},
 	{0x0d8510de, 0x00, "NVIDIA MCP89",	AHCI_Q_NOFORCE|AHCI_Q_NOAA},
 	{0x0d8610de, 0x00, "NVIDIA MCP89",	AHCI_Q_NOAA},
 	{0x0d8710de, 0x00, "NVIDIA MCP89",	AHCI_Q_NOAA},
 	{0x0d8810de, 0x00, "NVIDIA MCP89",	AHCI_Q_NOAA},
 	{0x0d8910de, 0x00, "NVIDIA MCP89",	AHCI_Q_NOAA},
 	{0x0d8a10de, 0x00, "NVIDIA MCP89",	AHCI_Q_NOAA},
 	{0x0d8b10de, 0x00, "NVIDIA MCP89",	AHCI_Q_NOAA},
 	{0x0d8c10de, 0x00, "NVIDIA MCP89",	AHCI_Q_NOAA},
 	{0x0d8d10de, 0x00, "NVIDIA MCP89",	AHCI_Q_NOAA},
 	{0x0d8e10de, 0x00, "NVIDIA MCP89",	AHCI_Q_NOAA},
 	{0x0d8f10de, 0x00, "NVIDIA MCP89",	AHCI_Q_NOAA},
 	{0x3781105a, 0x00, "Promise TX8660",	0},
 	{0x33491106, 0x00, "VIA VT8251",	AHCI_Q_NOPMP|AHCI_Q_NONCQ},
 	{0x62871106, 0x00, "VIA VT8251",	AHCI_Q_NOPMP|AHCI_Q_NONCQ},
 	{0x11841039, 0x00, "SiS 966",		0},
 	{0x11851039, 0x00, "SiS 968",		0},
 	{0x01861039, 0x00, "SiS 968",		0},
 	{0xa01c177d, 0x00, "ThunderX",		AHCI_Q_ABAR0|AHCI_Q_1MSI},
 	{0x00311c36, 0x00, "Annapurna",		AHCI_Q_FORCE_PI|AHCI_Q_RESTORE_CAP|AHCI_Q_NOMSIX},
 	{0x00000000, 0x00, NULL,		0}
 };
 
 static int
 ahci_pci_ctlr_reset(device_t dev)
 {
 
 	if (pci_read_config(dev, PCIR_DEVVENDOR, 4) == 0x28298086 &&
 	    (pci_read_config(dev, 0x92, 1) & 0xfe) == 0x04)
 		pci_write_config(dev, 0x92, 0x01, 1);
 	return ahci_ctlr_reset(dev);
 }
 
 static int
 ahci_probe(device_t dev)
 {
 	char buf[64];
 	int i, valid = 0;
 	uint32_t devid = pci_get_devid(dev);
 	uint8_t revid = pci_get_revid(dev);
 
 	/*
 	 * Ensure it is not a PCI bridge (some vendors use
 	 * the same PID and VID in PCI bridge and AHCI cards).
 	 */
 	if (pci_get_class(dev) == PCIC_BRIDGE)
 		return (ENXIO);
 
 	/* Is this a possible AHCI candidate? */
 	if (pci_get_class(dev) == PCIC_STORAGE &&
 	    pci_get_subclass(dev) == PCIS_STORAGE_SATA &&
 	    pci_get_progif(dev) == PCIP_STORAGE_SATA_AHCI_1_0)
 		valid = 1;
 	else if (pci_get_class(dev) == PCIC_STORAGE &&
 	    pci_get_subclass(dev) == PCIS_STORAGE_RAID)
 		valid = 2;
 	/* Is this a known AHCI chip? */
 	for (i = 0; ahci_ids[i].id != 0; i++) {
 		if (ahci_ids[i].id == devid &&
 		    ahci_ids[i].rev <= revid &&
 		    (valid || (force_ahci == 1 &&
 		     !(ahci_ids[i].quirks & AHCI_Q_NOFORCE)))) {
 			/* Do not attach JMicrons with single PCI function. */
 			if (pci_get_vendor(dev) == 0x197b &&
 			    (pci_read_config(dev, 0xdf, 1) & 0x40) == 0)
 				return (ENXIO);
 			snprintf(buf, sizeof(buf), "%s AHCI SATA controller",
 			    ahci_ids[i].name);
 			device_set_desc_copy(dev, buf);
 			return (BUS_PROBE_DEFAULT);
 		}
 	}
 	if (valid != 1)
 		return (ENXIO);
 	device_set_desc_copy(dev, "AHCI SATA controller");
 	return (BUS_PROBE_DEFAULT);
 }
 
 static int
 ahci_ata_probe(device_t dev)
 {
 	char buf[64];
 	int i;
 	uint32_t devid = pci_get_devid(dev);
 	uint8_t revid = pci_get_revid(dev);
 
 	if ((intptr_t)device_get_ivars(dev) >= 0)
 		return (ENXIO);
 	/* Is this a known AHCI chip? */
 	for (i = 0; ahci_ids[i].id != 0; i++) {
 		if (ahci_ids[i].id == devid &&
 		    ahci_ids[i].rev <= revid) {
 			snprintf(buf, sizeof(buf), "%s AHCI SATA controller",
 			    ahci_ids[i].name);
 			device_set_desc_copy(dev, buf);
 			return (BUS_PROBE_DEFAULT);
 		}
 	}
 	device_set_desc_copy(dev, "AHCI SATA controller");
 	return (BUS_PROBE_DEFAULT);
 }
 
 static int
 ahci_pci_read_msix_bars(device_t dev, uint8_t *table_bar, uint8_t *pba_bar)
 {
 	int cap_offset = 0, ret;
 	uint32_t val;
 
 	if ((table_bar == NULL) || (pba_bar == NULL))
 		return (EINVAL);
 
 	ret = pci_find_cap(dev, PCIY_MSIX, &cap_offset);
 	if (ret != 0)
 		return (EINVAL);
 
 	val = pci_read_config(dev, cap_offset + PCIR_MSIX_TABLE, 4);
 	*table_bar = PCIR_BAR(val & PCIM_MSIX_BIR_MASK);
 
 	val = pci_read_config(dev, cap_offset + PCIR_MSIX_PBA, 4);
 	*pba_bar = PCIR_BAR(val & PCIM_MSIX_BIR_MASK);
 
 	return (0);
 }
 
 static int
 ahci_pci_attach(device_t dev)
 {
 	struct ahci_controller *ctlr = device_get_softc(dev);
 	int	error, i;
 	uint32_t devid = pci_get_devid(dev);
 	uint8_t revid = pci_get_revid(dev);
 	int msi_count, msix_count;
 	uint8_t table_bar = 0, pba_bar = 0;
 
 	msi_count = pci_msi_count(dev);
 	msix_count = pci_msix_count(dev);
 
 	i = 0;
 	while (ahci_ids[i].id != 0 &&
 	    (ahci_ids[i].id != devid ||
 	     ahci_ids[i].rev > revid))
 		i++;
 	ctlr->quirks = ahci_ids[i].quirks;
 	/* Limit speed for my onboard JMicron external port.
 	 * It is not eSATA really, limit to SATA 1 */
 	if (pci_get_devid(dev) == 0x2363197b &&
 	    pci_get_subvendor(dev) == 0x1043 &&
 	    pci_get_subdevice(dev) == 0x81e4)
 		ctlr->quirks |= AHCI_Q_SATA1_UNIT0;
 	resource_int_value(device_get_name(dev), device_get_unit(dev),
 	    "quirks", &ctlr->quirks);
 	ctlr->vendorid = pci_get_vendor(dev);
 	ctlr->deviceid = pci_get_device(dev);
 	ctlr->subvendorid = pci_get_subvendor(dev);
 	ctlr->subdeviceid = pci_get_subdevice(dev);
 
 	/* Default AHCI Base Address is BAR(5), Cavium uses BAR(0) */
 	if (ctlr->quirks & AHCI_Q_ABAR0)
 		ctlr->r_rid = PCIR_BAR(0);
 	else
 		ctlr->r_rid = PCIR_BAR(5);
 	if (!(ctlr->r_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
 	    &ctlr->r_rid, RF_ACTIVE)))
 		return ENXIO;
 
 	if (ctlr->quirks & AHCI_Q_NOMSIX)
 		msix_count = 0;
 
 	/* Read MSI-x BAR IDs if supported */
 	if (msix_count > 0) {
 		error = ahci_pci_read_msix_bars(dev, &table_bar, &pba_bar);
 		if (error == 0) {
 			ctlr->r_msix_tab_rid = table_bar;
 			ctlr->r_msix_pba_rid = pba_bar;
 		} else {
 			/* Failed to read BARs, disable MSI-x */
 			msix_count = 0;
 		}
 	}
 
 	/* Allocate resources for MSI-x table and PBA */
 	if (msix_count > 0) {
 		/*
 		 * Allocate new MSI-x table only if not
 		 * allocated before.
 		 */
 		ctlr->r_msix_table = NULL;
 		if (ctlr->r_msix_tab_rid != ctlr->r_rid) {
 			/* Separate BAR for MSI-x */
 			ctlr->r_msix_table = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
 			    &ctlr->r_msix_tab_rid, RF_ACTIVE);
 			if (ctlr->r_msix_table == NULL) {
 				ahci_free_mem(dev);
 				return (ENXIO);
 			}
 		}
 
 		/*
 		 * Allocate new PBA table only if not
 		 * allocated before.
 		 */
 		ctlr->r_msix_pba = NULL;
 		if ((ctlr->r_msix_pba_rid != ctlr->r_msix_tab_rid) &&
 		    (ctlr->r_msix_pba_rid != ctlr->r_rid)) {
 			/* Separate BAR for PBA */
 			ctlr->r_msix_pba = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
 			    &ctlr->r_msix_pba_rid, RF_ACTIVE);
 			if (ctlr->r_msix_pba == NULL) {
 				ahci_free_mem(dev);
 				return (ENXIO);
 			}
 		}
 	}
 
 	pci_enable_busmaster(dev);
 	/* Reset controller */
 	if ((error = ahci_pci_ctlr_reset(dev)) != 0) {
 		ahci_free_mem(dev);
 		return (error);
 	}
 
 	/* Setup interrupts. */
 
 	/* Setup MSI register parameters */
 	/* Process hints. */
 	if (ctlr->quirks & AHCI_Q_NOMSI)
 		ctlr->msi = 0;
 	else if (ctlr->quirks & AHCI_Q_1MSI)
 		ctlr->msi = 1;
 	else
 		ctlr->msi = 2;
 	resource_int_value(device_get_name(dev),
 	    device_get_unit(dev), "msi", &ctlr->msi);
 	ctlr->numirqs = 1;
 	if (msi_count == 0 && msix_count == 0)
 		ctlr->msi = 0;
 	if (ctlr->msi < 0)
 		ctlr->msi = 0;
 	else if (ctlr->msi == 1) {
 		msi_count = min(1, msi_count);
 		msix_count = min(1, msix_count);
 	} else if (ctlr->msi > 1)
 		ctlr->msi = 2;
 
 	/* Allocate MSI/MSI-x if needed/present. */
 	if (ctlr->msi > 0) {
 		error = ENXIO;
 
 		/* Try to allocate MSI-x first */
 		if (msix_count > 0) {
 			error = pci_alloc_msix(dev, &msix_count);
 			if (error == 0)
 				ctlr->numirqs = msix_count;
 		}
 
 		/*
 		 * Try to allocate MSI if msi_count is greater than 0
 		 * and if MSI-x allocation failed.
 		 */
 		if ((error != 0) && (msi_count > 0)) {
 			error = pci_alloc_msi(dev, &msi_count);
 			if (error == 0)
 				ctlr->numirqs = msi_count;
 		}
 
 		/* Both MSI and MSI-x allocations failed */
 		if (error != 0) {
 			ctlr->msi = 0;
 			device_printf(dev, "Failed to allocate MSI/MSI-x, "
 			    "falling back to INTx\n");
 		}
 	}
 
 	error = ahci_attach(dev);
 	if (error != 0) {
 		if (ctlr->msi > 0)
 			pci_release_msi(dev);
 		ahci_free_mem(dev);
 	}
 	return error;
 }
 
 static int
 ahci_pci_detach(device_t dev)
 {
 
 	ahci_detach(dev);
 	pci_release_msi(dev);
 	return (0);
 }
 
 static int
 ahci_pci_suspend(device_t dev)
 {
 	struct ahci_controller *ctlr = device_get_softc(dev);
 
 	bus_generic_suspend(dev);
 	/* Disable interupts, so the state change(s) doesn't trigger */
 	ATA_OUTL(ctlr->r_mem, AHCI_GHC,
 	     ATA_INL(ctlr->r_mem, AHCI_GHC) & (~AHCI_GHC_IE));
 	return 0;
 }
 
 static int
 ahci_pci_resume(device_t dev)
 {
 	int res;
 
 	if ((res = ahci_pci_ctlr_reset(dev)) != 0)
 		return (res);
 	ahci_ctlr_setup(dev);
 	return (bus_generic_resume(dev));
 }
 
 devclass_t ahci_devclass;
 static device_method_t ahci_methods[] = {
 	DEVMETHOD(device_probe,     ahci_probe),
 	DEVMETHOD(device_attach,    ahci_pci_attach),
 	DEVMETHOD(device_detach,    ahci_pci_detach),
 	DEVMETHOD(device_suspend,   ahci_pci_suspend),
 	DEVMETHOD(device_resume,    ahci_pci_resume),
 	DEVMETHOD(bus_print_child,  ahci_print_child),
 	DEVMETHOD(bus_alloc_resource,       ahci_alloc_resource),
 	DEVMETHOD(bus_release_resource,     ahci_release_resource),
 	DEVMETHOD(bus_setup_intr,   ahci_setup_intr),
 	DEVMETHOD(bus_teardown_intr,ahci_teardown_intr),
 	DEVMETHOD(bus_child_location_str, ahci_child_location_str),
 	DEVMETHOD(bus_get_dma_tag,  ahci_get_dma_tag),
 	DEVMETHOD_END
 };
 static driver_t ahci_driver = {
         "ahci",
         ahci_methods,
         sizeof(struct ahci_controller)
 };
 DRIVER_MODULE(ahci, pci, ahci_driver, ahci_devclass, NULL, NULL);
 static device_method_t ahci_ata_methods[] = {
 	DEVMETHOD(device_probe,     ahci_ata_probe),
 	DEVMETHOD(device_attach,    ahci_pci_attach),
 	DEVMETHOD(device_detach,    ahci_pci_detach),
 	DEVMETHOD(device_suspend,   ahci_pci_suspend),
 	DEVMETHOD(device_resume,    ahci_pci_resume),
 	DEVMETHOD(bus_print_child,  ahci_print_child),
 	DEVMETHOD(bus_alloc_resource,       ahci_alloc_resource),
 	DEVMETHOD(bus_release_resource,     ahci_release_resource),
 	DEVMETHOD(bus_setup_intr,   ahci_setup_intr),
 	DEVMETHOD(bus_teardown_intr,ahci_teardown_intr),
 	DEVMETHOD(bus_child_location_str, ahci_child_location_str),
 	DEVMETHOD_END
 };
 static driver_t ahci_ata_driver = {
         "ahci",
         ahci_ata_methods,
         sizeof(struct ahci_controller)
 };
 DRIVER_MODULE(ahci, atapci, ahci_ata_driver, ahci_devclass, NULL, NULL);
Index: projects/clang391-import/sys/dev/ath/if_ath.c
===================================================================
--- projects/clang391-import/sys/dev/ath/if_ath.c	(revision 309262)
+++ projects/clang391-import/sys/dev/ath/if_ath.c	(revision 309263)
@@ -1,6671 +1,6707 @@
 /*-
  * Copyright (c) 2002-2009 Sam Leffler, Errno Consulting
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer,
  *    without modification.
  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
  *    similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any
  *    redistribution must be conditioned upon including a substantially
  *    similar Disclaimer requirement for further binary redistribution.
  *
  * NO WARRANTY
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY
  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
  * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY,
  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
  * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  * THE POSSIBILITY OF SUCH DAMAGES.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * Driver for the Atheros Wireless LAN controller.
  *
  * This software is derived from work of Atsushi Onoe; his contribution
  * is greatly appreciated.
  */
 
 #include "opt_inet.h"
 #include "opt_ath.h"
 /*
  * This is needed for register operations which are performed
  * by the driver - eg, calls to ath_hal_gettsf32().
  *
  * It's also required for any AH_DEBUG checks in here, eg the
  * module dependencies.
  */
 #include "opt_ah.h"
 #include "opt_wlan.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sysctl.h>
 #include <sys/mbuf.h>
 #include <sys/malloc.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/kernel.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/errno.h>
 #include <sys/callout.h>
 #include <sys/bus.h>
 #include <sys/endian.h>
 #include <sys/kthread.h>
 #include <sys/taskqueue.h>
 #include <sys/priv.h>
 #include <sys/module.h>
 #include <sys/ktr.h>
 #include <sys/smp.h>	/* for mp_ncpus */
 
 #include <machine/bus.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/if_media.h>
 #include <net/if_types.h>
 #include <net/if_arp.h>
 #include <net/ethernet.h>
 #include <net/if_llc.h>
 
 #include <net80211/ieee80211_var.h>
 #include <net80211/ieee80211_regdomain.h>
 #ifdef IEEE80211_SUPPORT_SUPERG
 #include <net80211/ieee80211_superg.h>
 #endif
 #ifdef IEEE80211_SUPPORT_TDMA
 #include <net80211/ieee80211_tdma.h>
 #endif
 
 #include <net/bpf.h>
 
 #ifdef INET
 #include <netinet/in.h>
 #include <netinet/if_ether.h>
 #endif
 
 #include <dev/ath/if_athvar.h>
 #include <dev/ath/ath_hal/ah_devid.h>		/* XXX for softled */
 #include <dev/ath/ath_hal/ah_diagcodes.h>
 
 #include <dev/ath/if_ath_debug.h>
 #include <dev/ath/if_ath_misc.h>
 #include <dev/ath/if_ath_tsf.h>
 #include <dev/ath/if_ath_tx.h>
 #include <dev/ath/if_ath_sysctl.h>
 #include <dev/ath/if_ath_led.h>
 #include <dev/ath/if_ath_keycache.h>
 #include <dev/ath/if_ath_rx.h>
 #include <dev/ath/if_ath_rx_edma.h>
 #include <dev/ath/if_ath_tx_edma.h>
 #include <dev/ath/if_ath_beacon.h>
 #include <dev/ath/if_ath_btcoex.h>
 #include <dev/ath/if_ath_btcoex_mci.h>
 #include <dev/ath/if_ath_spectral.h>
 #include <dev/ath/if_ath_lna_div.h>
 #include <dev/ath/if_athdfs.h>
 #include <dev/ath/if_ath_ioctl.h>
 #include <dev/ath/if_ath_descdma.h>
 
 #ifdef ATH_TX99_DIAG
 #include <dev/ath/ath_tx99/ath_tx99.h>
 #endif
 
 #ifdef	ATH_DEBUG_ALQ
 #include <dev/ath/if_ath_alq.h>
 #endif
 
 /*
  * Only enable this if you're working on PS-POLL support.
  */
 #define	ATH_SW_PSQ
 
 /*
  * ATH_BCBUF determines the number of vap's that can transmit
  * beacons and also (currently) the number of vap's that can
  * have unique mac addresses/bssid.  When staggering beacons
  * 4 is probably a good max as otherwise the beacons become
  * very closely spaced and there is limited time for cab q traffic
  * to go out.  You can burst beacons instead but that is not good
  * for stations in power save and at some point you really want
  * another radio (and channel).
  *
  * The limit on the number of mac addresses is tied to our use of
  * the U/L bit and tracking addresses in a byte; it would be
  * worthwhile to allow more for applications like proxy sta.
  */
 CTASSERT(ATH_BCBUF <= 8);
 
 static struct ieee80211vap *ath_vap_create(struct ieee80211com *,
 		    const char [IFNAMSIZ], int, enum ieee80211_opmode, int,
 		    const uint8_t [IEEE80211_ADDR_LEN],
 		    const uint8_t [IEEE80211_ADDR_LEN]);
 static void	ath_vap_delete(struct ieee80211vap *);
 static int	ath_init(struct ath_softc *);
 static void	ath_stop(struct ath_softc *);
 static int	ath_reset_vap(struct ieee80211vap *, u_long);
 static int	ath_transmit(struct ieee80211com *, struct mbuf *);
 static int	ath_media_change(struct ifnet *);
 static void	ath_watchdog(void *);
 static void	ath_parent(struct ieee80211com *);
 static void	ath_fatal_proc(void *, int);
 static void	ath_bmiss_vap(struct ieee80211vap *);
 static void	ath_bmiss_proc(void *, int);
 static void	ath_key_update_begin(struct ieee80211vap *);
 static void	ath_key_update_end(struct ieee80211vap *);
 static void	ath_update_mcast_hw(struct ath_softc *);
 static void	ath_update_mcast(struct ieee80211com *);
 static void	ath_update_promisc(struct ieee80211com *);
 static void	ath_updateslot(struct ieee80211com *);
 static void	ath_bstuck_proc(void *, int);
 static void	ath_reset_proc(void *, int);
 static int	ath_desc_alloc(struct ath_softc *);
 static void	ath_desc_free(struct ath_softc *);
 static struct ieee80211_node *ath_node_alloc(struct ieee80211vap *,
 			const uint8_t [IEEE80211_ADDR_LEN]);
 static void	ath_node_cleanup(struct ieee80211_node *);
 static void	ath_node_free(struct ieee80211_node *);
 static void	ath_node_getsignal(const struct ieee80211_node *,
 			int8_t *, int8_t *);
 static void	ath_txq_init(struct ath_softc *sc, struct ath_txq *, int);
 static struct ath_txq *ath_txq_setup(struct ath_softc*, int qtype, int subtype);
 static int	ath_tx_setup(struct ath_softc *, int, int);
 static void	ath_tx_cleanupq(struct ath_softc *, struct ath_txq *);
 static void	ath_tx_cleanup(struct ath_softc *);
 static int	ath_tx_processq(struct ath_softc *sc, struct ath_txq *txq,
 		    int dosched);
 static void	ath_tx_proc_q0(void *, int);
 static void	ath_tx_proc_q0123(void *, int);
 static void	ath_tx_proc(void *, int);
 static void	ath_txq_sched_tasklet(void *, int);
 static int	ath_chan_set(struct ath_softc *, struct ieee80211_channel *);
 static void	ath_chan_change(struct ath_softc *, struct ieee80211_channel *);
 static void	ath_scan_start(struct ieee80211com *);
 static void	ath_scan_end(struct ieee80211com *);
 static void	ath_set_channel(struct ieee80211com *);
 #ifdef	ATH_ENABLE_11N
 static void	ath_update_chw(struct ieee80211com *);
 #endif	/* ATH_ENABLE_11N */
 static void	ath_calibrate(void *);
 static int	ath_newstate(struct ieee80211vap *, enum ieee80211_state, int);
 static void	ath_setup_stationkey(struct ieee80211_node *);
 static void	ath_newassoc(struct ieee80211_node *, int);
 static int	ath_setregdomain(struct ieee80211com *,
 		    struct ieee80211_regdomain *, int,
 		    struct ieee80211_channel []);
 static void	ath_getradiocaps(struct ieee80211com *, int, int *,
 		    struct ieee80211_channel []);
 static int	ath_getchannels(struct ath_softc *);
 
 static int	ath_rate_setup(struct ath_softc *, u_int mode);
 static void	ath_setcurmode(struct ath_softc *, enum ieee80211_phymode);
 
 static void	ath_announce(struct ath_softc *);
 
 static void	ath_dfs_tasklet(void *, int);
 static void	ath_node_powersave(struct ieee80211_node *, int);
 static int	ath_node_set_tim(struct ieee80211_node *, int);
 static void	ath_node_recv_pspoll(struct ieee80211_node *, struct mbuf *);
 
 #ifdef IEEE80211_SUPPORT_TDMA
 #include <dev/ath/if_ath_tdma.h>
 #endif
 
 SYSCTL_DECL(_hw_ath);
 
 /* XXX validate sysctl values */
 static	int ath_longcalinterval = 30;		/* long cals every 30 secs */
 SYSCTL_INT(_hw_ath, OID_AUTO, longcal, CTLFLAG_RW, &ath_longcalinterval,
 	    0, "long chip calibration interval (secs)");
 static	int ath_shortcalinterval = 100;		/* short cals every 100 ms */
 SYSCTL_INT(_hw_ath, OID_AUTO, shortcal, CTLFLAG_RW, &ath_shortcalinterval,
 	    0, "short chip calibration interval (msecs)");
 static	int ath_resetcalinterval = 20*60;	/* reset cal state 20 mins */
 SYSCTL_INT(_hw_ath, OID_AUTO, resetcal, CTLFLAG_RW, &ath_resetcalinterval,
 	    0, "reset chip calibration results (secs)");
 static	int ath_anicalinterval = 100;		/* ANI calibration - 100 msec */
 SYSCTL_INT(_hw_ath, OID_AUTO, anical, CTLFLAG_RW, &ath_anicalinterval,
 	    0, "ANI calibration (msecs)");
 
 int ath_rxbuf = ATH_RXBUF;		/* # rx buffers to allocate */
 SYSCTL_INT(_hw_ath, OID_AUTO, rxbuf, CTLFLAG_RWTUN, &ath_rxbuf,
 	    0, "rx buffers allocated");
 int ath_txbuf = ATH_TXBUF;		/* # tx buffers to allocate */
 SYSCTL_INT(_hw_ath, OID_AUTO, txbuf, CTLFLAG_RWTUN, &ath_txbuf,
 	    0, "tx buffers allocated");
 int ath_txbuf_mgmt = ATH_MGMT_TXBUF;	/* # mgmt tx buffers to allocate */
 SYSCTL_INT(_hw_ath, OID_AUTO, txbuf_mgmt, CTLFLAG_RWTUN, &ath_txbuf_mgmt,
 	    0, "tx (mgmt) buffers allocated");
 
 int ath_bstuck_threshold = 4;		/* max missed beacons */
 SYSCTL_INT(_hw_ath, OID_AUTO, bstuck, CTLFLAG_RW, &ath_bstuck_threshold,
 	    0, "max missed beacon xmits before chip reset");
 
 MALLOC_DEFINE(M_ATHDEV, "athdev", "ath driver dma buffers");
 
 void
 ath_legacy_attach_comp_func(struct ath_softc *sc)
 {
 
 	/*
 	 * Special case certain configurations.  Note the
 	 * CAB queue is handled by these specially so don't
 	 * include them when checking the txq setup mask.
 	 */
 	switch (sc->sc_txqsetup &~ (1<<sc->sc_cabq->axq_qnum)) {
 	case 0x01:
 		TASK_INIT(&sc->sc_txtask, 0, ath_tx_proc_q0, sc);
 		break;
 	case 0x0f:
 		TASK_INIT(&sc->sc_txtask, 0, ath_tx_proc_q0123, sc);
 		break;
 	default:
 		TASK_INIT(&sc->sc_txtask, 0, ath_tx_proc, sc);
 		break;
 	}
 }
 
 /*
  * Set the target power mode.
  *
  * If this is called during a point in time where
  * the hardware is being programmed elsewhere, it will
  * simply store it away and update it when all current
  * uses of the hardware are completed.
+ *
+ * If the chip is going into network sleep or power off, then
+ * we will wait until all uses of the chip are done before
+ * going into network sleep or power off.
+ *
+ * If the chip is being programmed full-awake, then immediately
+ * program it full-awake so we can actually stay awake rather than
+ * the chip potentially going to sleep underneath us.
  */
 void
-_ath_power_setpower(struct ath_softc *sc, int power_state, const char *file, int line)
+_ath_power_setpower(struct ath_softc *sc, int power_state, int selfgen,
+    const char *file, int line)
 {
 	ATH_LOCK_ASSERT(sc);
 
-	sc->sc_target_powerstate = power_state;
-
-	DPRINTF(sc, ATH_DEBUG_PWRSAVE, "%s: (%s:%d) state=%d, refcnt=%d\n",
+	DPRINTF(sc, ATH_DEBUG_PWRSAVE, "%s: (%s:%d) state=%d, refcnt=%d, target=%d, cur=%d\n",
 	    __func__,
 	    file,
 	    line,
 	    power_state,
-	    sc->sc_powersave_refcnt);
+	    sc->sc_powersave_refcnt,
+	    sc->sc_target_powerstate,
+	    sc->sc_cur_powerstate);
 
-	if (sc->sc_powersave_refcnt == 0 &&
+	sc->sc_target_powerstate = power_state;
+
+	/*
+	 * Don't program the chip into network sleep if the chip
+	 * is being programmed elsewhere.
+	 *
+	 * However, if the chip is being programmed /awake/, force
+	 * the chip awake so we stay awake.
+	 */
+	if ((sc->sc_powersave_refcnt == 0 || power_state == HAL_PM_AWAKE) &&
 	    power_state != sc->sc_cur_powerstate) {
 		sc->sc_cur_powerstate = power_state;
 		ath_hal_setpower(sc->sc_ah, power_state);
 
 		/*
 		 * If the NIC is force-awake, then set the
 		 * self-gen frame state appropriately.
 		 *
 		 * If the nic is in network sleep or full-sleep,
 		 * we let the above call leave the self-gen
 		 * state as "sleep".
 		 */
-		if (sc->sc_cur_powerstate == HAL_PM_AWAKE &&
+		if (selfgen &&
+		    sc->sc_cur_powerstate == HAL_PM_AWAKE &&
 		    sc->sc_target_selfgen_state != HAL_PM_AWAKE) {
 			ath_hal_setselfgenpower(sc->sc_ah,
 			    sc->sc_target_selfgen_state);
 		}
 	}
 }
 
 /*
  * Set the current self-generated frames state.
  *
  * This is separate from the target power mode.  The chip may be
  * awake but the desired state is "sleep", so frames sent to the
  * destination has PWRMGT=1 in the 802.11 header.  The NIC also
  * needs to know to set PWRMGT=1 in self-generated frames.
  */
 void
 _ath_power_set_selfgen(struct ath_softc *sc, int power_state, const char *file, int line)
 {
 
 	ATH_LOCK_ASSERT(sc);
 
 	DPRINTF(sc, ATH_DEBUG_PWRSAVE, "%s: (%s:%d) state=%d, refcnt=%d\n",
 	    __func__,
 	    file,
 	    line,
 	    power_state,
 	    sc->sc_target_selfgen_state);
 
 	sc->sc_target_selfgen_state = power_state;
 
 	/*
 	 * If the NIC is force-awake, then set the power state.
 	 * Network-state and full-sleep will already transition it to
 	 * mark self-gen frames as sleeping - and we can't
 	 * guarantee the NIC is awake to program the self-gen frame
 	 * setting anyway.
 	 */
 	if (sc->sc_cur_powerstate == HAL_PM_AWAKE) {
 		ath_hal_setselfgenpower(sc->sc_ah, power_state);
 	}
 }
 
 /*
  * Set the hardware power mode and take a reference.
  *
  * This doesn't update the target power mode in the driver;
  * it just updates the hardware power state.
  *
  * XXX it should only ever force the hardware awake; it should
  * never be called to set it asleep.
  */
 void
 _ath_power_set_power_state(struct ath_softc *sc, int power_state, const char *file, int line)
 {
 	ATH_LOCK_ASSERT(sc);
 
 	DPRINTF(sc, ATH_DEBUG_PWRSAVE, "%s: (%s:%d) state=%d, refcnt=%d\n",
 	    __func__,
 	    file,
 	    line,
 	    power_state,
 	    sc->sc_powersave_refcnt);
 
 	sc->sc_powersave_refcnt++;
 
+	/*
+	 * Only do the power state change if we're not programming
+	 * it elsewhere.
+	 */
 	if (power_state != sc->sc_cur_powerstate) {
 		ath_hal_setpower(sc->sc_ah, power_state);
 		sc->sc_cur_powerstate = power_state;
-
 		/*
 		 * Adjust the self-gen powerstate if appropriate.
 		 */
 		if (sc->sc_cur_powerstate == HAL_PM_AWAKE &&
 		    sc->sc_target_selfgen_state != HAL_PM_AWAKE) {
 			ath_hal_setselfgenpower(sc->sc_ah,
 			    sc->sc_target_selfgen_state);
 		}
-
 	}
 }
 
 /*
  * Restore the power save mode to what it once was.
  *
  * This will decrement the reference counter and once it hits
  * zero, it'll restore the powersave state.
  */
 void
 _ath_power_restore_power_state(struct ath_softc *sc, const char *file, int line)
 {
 
 	ATH_LOCK_ASSERT(sc);
 
 	DPRINTF(sc, ATH_DEBUG_PWRSAVE, "%s: (%s:%d) refcnt=%d, target state=%d\n",
 	    __func__,
 	    file,
 	    line,
 	    sc->sc_powersave_refcnt,
 	    sc->sc_target_powerstate);
 
 	if (sc->sc_powersave_refcnt == 0)
 		device_printf(sc->sc_dev, "%s: refcnt=0?\n", __func__);
 	else
 		sc->sc_powersave_refcnt--;
 
 	if (sc->sc_powersave_refcnt == 0 &&
 	    sc->sc_target_powerstate != sc->sc_cur_powerstate) {
 		sc->sc_cur_powerstate = sc->sc_target_powerstate;
 		ath_hal_setpower(sc->sc_ah, sc->sc_target_powerstate);
 	}
 
 	/*
 	 * Adjust the self-gen powerstate if appropriate.
 	 */
 	if (sc->sc_cur_powerstate == HAL_PM_AWAKE &&
 	    sc->sc_target_selfgen_state != HAL_PM_AWAKE) {
 		ath_hal_setselfgenpower(sc->sc_ah,
 		    sc->sc_target_selfgen_state);
 	}
 
 }
 
 /*
  * Configure the initial HAL configuration values based on bus
  * specific parameters.
  *
  * Some PCI IDs and other information may need tweaking.
  *
  * XXX TODO: ath9k and the Atheros HAL only program comm2g_switch_enable
  * if BT antenna diversity isn't enabled.
  *
  * So, let's also figure out how to enable BT diversity for AR9485.
  */
 static void
 ath_setup_hal_config(struct ath_softc *sc, HAL_OPS_CONFIG *ah_config)
 {
 	/* XXX TODO: only for PCI devices? */
 
 	if (sc->sc_pci_devinfo & (ATH_PCI_CUS198 | ATH_PCI_CUS230)) {
 		ah_config->ath_hal_ext_lna_ctl_gpio = 0x200; /* bit 9 */
 		ah_config->ath_hal_ext_atten_margin_cfg = AH_TRUE;
 		ah_config->ath_hal_min_gainidx = AH_TRUE;
 		ah_config->ath_hal_ant_ctrl_comm2g_switch_enable = 0x000bbb88;
 		/* XXX low_rssi_thresh */
 		/* XXX fast_div_bias */
 		device_printf(sc->sc_dev, "configuring for %s\n",
 		    (sc->sc_pci_devinfo & ATH_PCI_CUS198) ?
 		    "CUS198" : "CUS230");
 	}
 
 	if (sc->sc_pci_devinfo & ATH_PCI_CUS217)
 		device_printf(sc->sc_dev, "CUS217 card detected\n");
 
 	if (sc->sc_pci_devinfo & ATH_PCI_CUS252)
 		device_printf(sc->sc_dev, "CUS252 card detected\n");
 
 	if (sc->sc_pci_devinfo & ATH_PCI_AR9565_1ANT)
 		device_printf(sc->sc_dev, "WB335 1-ANT card detected\n");
 
 	if (sc->sc_pci_devinfo & ATH_PCI_AR9565_2ANT)
 		device_printf(sc->sc_dev, "WB335 2-ANT card detected\n");
 
 	if (sc->sc_pci_devinfo & ATH_PCI_BT_ANT_DIV)
 		device_printf(sc->sc_dev,
 		    "Bluetooth Antenna Diversity card detected\n");
 
 	if (sc->sc_pci_devinfo & ATH_PCI_KILLER)
 		device_printf(sc->sc_dev, "Killer Wireless card detected\n");
 
 #if 0
         /*
          * Some WB335 cards do not support antenna diversity. Since
          * we use a hardcoded value for AR9565 instead of using the
          * EEPROM/OTP data, remove the combining feature from
          * the HW capabilities bitmap.
          */
         if (sc->sc_pci_devinfo & (ATH9K_PCI_AR9565_1ANT | ATH9K_PCI_AR9565_2ANT)) {
                 if (!(sc->sc_pci_devinfo & ATH9K_PCI_BT_ANT_DIV))
                         pCap->hw_caps &= ~ATH9K_HW_CAP_ANT_DIV_COMB;
         }
 
         if (sc->sc_pci_devinfo & ATH9K_PCI_BT_ANT_DIV) {
                 pCap->hw_caps |= ATH9K_HW_CAP_BT_ANT_DIV;
                 device_printf(sc->sc_dev, "Set BT/WLAN RX diversity capability\n");
         }
 #endif
 
         if (sc->sc_pci_devinfo & ATH_PCI_D3_L1_WAR) {
                 ah_config->ath_hal_pcie_waen = 0x0040473b;
                 device_printf(sc->sc_dev, "Enable WAR for ASPM D3/L1\n");
         }
 
 #if 0
         if (sc->sc_pci_devinfo & ATH9K_PCI_NO_PLL_PWRSAVE) {
                 ah->config.no_pll_pwrsave = true;
                 device_printf(sc->sc_dev, "Disable PLL PowerSave\n");
         }
 #endif
 
 }
 
 /*
  * Attempt to fetch the MAC address from the kernel environment.
  *
  * Returns 0, macaddr in macaddr if successful; -1 otherwise.
  */
 static int
 ath_fetch_mac_kenv(struct ath_softc *sc, uint8_t *macaddr)
 {
 	char devid_str[32];
 	int local_mac = 0;
 	char *local_macstr;
 
 	/*
 	 * Fetch from the kenv rather than using hints.
 	 *
 	 * Hints would be nice but the transition to dynamic
 	 * hints/kenv doesn't happen early enough for this
 	 * to work reliably (eg on anything embedded.)
 	 */
 	snprintf(devid_str, 32, "hint.%s.%d.macaddr",
 	    device_get_name(sc->sc_dev),
 	    device_get_unit(sc->sc_dev));
 
 	if ((local_macstr = kern_getenv(devid_str)) != NULL) {
 		uint32_t tmpmac[ETHER_ADDR_LEN];
 		int count;
 		int i;
 
 		/* Have a MAC address; should use it */
 		device_printf(sc->sc_dev,
 		    "Overriding MAC address from environment: '%s'\n",
 		    local_macstr);
 
 		/* Extract out the MAC address */
 		count = sscanf(local_macstr, "%x%*c%x%*c%x%*c%x%*c%x%*c%x",
 		    &tmpmac[0], &tmpmac[1],
 		    &tmpmac[2], &tmpmac[3],
 		    &tmpmac[4], &tmpmac[5]);
 		if (count == 6) {
 			/* Valid! */
 			local_mac = 1;
 			for (i = 0; i < ETHER_ADDR_LEN; i++)
 				macaddr[i] = tmpmac[i];
 		}
 		/* Done! */
 		freeenv(local_macstr);
 		local_macstr = NULL;
 	}
 
 	if (local_mac)
 		return (0);
 	return (-1);
 }
 
 #define	HAL_MODE_HT20 (HAL_MODE_11NG_HT20 | HAL_MODE_11NA_HT20)
 #define	HAL_MODE_HT40 \
 	(HAL_MODE_11NG_HT40PLUS | HAL_MODE_11NG_HT40MINUS | \
 	HAL_MODE_11NA_HT40PLUS | HAL_MODE_11NA_HT40MINUS)
 int
 ath_attach(u_int16_t devid, struct ath_softc *sc)
 {
 	struct ieee80211com *ic = &sc->sc_ic;
 	struct ath_hal *ah = NULL;
 	HAL_STATUS status;
 	int error = 0, i;
 	u_int wmodes;
 	int rx_chainmask, tx_chainmask;
 	HAL_OPS_CONFIG ah_config;
 
 	DPRINTF(sc, ATH_DEBUG_ANY, "%s: devid 0x%x\n", __func__, devid);
 
 	ic->ic_softc = sc;
 	ic->ic_name = device_get_nameunit(sc->sc_dev);
 
 	/*
 	 * Configure the initial configuration data.
 	 *
 	 * This is stuff that may be needed early during attach
 	 * rather than done via configuration calls later.
 	 */
 	bzero(&ah_config, sizeof(ah_config));
 	ath_setup_hal_config(sc, &ah_config);
 
 	ah = ath_hal_attach(devid, sc, sc->sc_st, sc->sc_sh,
 	    sc->sc_eepromdata, &ah_config, &status);
 	if (ah == NULL) {
 		device_printf(sc->sc_dev,
 		    "unable to attach hardware; HAL status %u\n", status);
 		error = ENXIO;
 		goto bad;
 	}
 	sc->sc_ah = ah;
 	sc->sc_invalid = 0;	/* ready to go, enable interrupt handling */
 #ifdef	ATH_DEBUG
 	sc->sc_debug = ath_debug;
 #endif
 
 	/*
 	 * Setup the DMA/EDMA functions based on the current
 	 * hardware support.
 	 *
 	 * This is required before the descriptors are allocated.
 	 */
 	if (ath_hal_hasedma(sc->sc_ah)) {
 		sc->sc_isedma = 1;
 		ath_recv_setup_edma(sc);
 		ath_xmit_setup_edma(sc);
 	} else {
 		ath_recv_setup_legacy(sc);
 		ath_xmit_setup_legacy(sc);
 	}
 
 	if (ath_hal_hasmybeacon(sc->sc_ah)) {
 		sc->sc_do_mybeacon = 1;
 	}
 
 	/*
 	 * Check if the MAC has multi-rate retry support.
 	 * We do this by trying to setup a fake extended
 	 * descriptor.  MAC's that don't have support will
 	 * return false w/o doing anything.  MAC's that do
 	 * support it will return true w/o doing anything.
 	 */
 	sc->sc_mrretry = ath_hal_setupxtxdesc(ah, NULL, 0,0, 0,0, 0,0);
 
 	/*
 	 * Check if the device has hardware counters for PHY
 	 * errors.  If so we need to enable the MIB interrupt
 	 * so we can act on stat triggers.
 	 */
 	if (ath_hal_hwphycounters(ah))
 		sc->sc_needmib = 1;
 
 	/*
 	 * Get the hardware key cache size.
 	 */
 	sc->sc_keymax = ath_hal_keycachesize(ah);
 	if (sc->sc_keymax > ATH_KEYMAX) {
 		device_printf(sc->sc_dev,
 		    "Warning, using only %u of %u key cache slots\n",
 		    ATH_KEYMAX, sc->sc_keymax);
 		sc->sc_keymax = ATH_KEYMAX;
 	}
 	/*
 	 * Reset the key cache since some parts do not
 	 * reset the contents on initial power up.
 	 */
 	for (i = 0; i < sc->sc_keymax; i++)
 		ath_hal_keyreset(ah, i);
 
 	/*
 	 * Collect the default channel list.
 	 */
 	error = ath_getchannels(sc);
 	if (error != 0)
 		goto bad;
 
 	/*
 	 * Setup rate tables for all potential media types.
 	 */
 	ath_rate_setup(sc, IEEE80211_MODE_11A);
 	ath_rate_setup(sc, IEEE80211_MODE_11B);
 	ath_rate_setup(sc, IEEE80211_MODE_11G);
 	ath_rate_setup(sc, IEEE80211_MODE_TURBO_A);
 	ath_rate_setup(sc, IEEE80211_MODE_TURBO_G);
 	ath_rate_setup(sc, IEEE80211_MODE_STURBO_A);
 	ath_rate_setup(sc, IEEE80211_MODE_11NA);
 	ath_rate_setup(sc, IEEE80211_MODE_11NG);
 	ath_rate_setup(sc, IEEE80211_MODE_HALF);
 	ath_rate_setup(sc, IEEE80211_MODE_QUARTER);
 
 	/* NB: setup here so ath_rate_update is happy */
 	ath_setcurmode(sc, IEEE80211_MODE_11A);
 
 	/*
 	 * Allocate TX descriptors and populate the lists.
 	 */
 	error = ath_desc_alloc(sc);
 	if (error != 0) {
 		device_printf(sc->sc_dev,
 		    "failed to allocate TX descriptors: %d\n", error);
 		goto bad;
 	}
 	error = ath_txdma_setup(sc);
 	if (error != 0) {
 		device_printf(sc->sc_dev,
 		    "failed to allocate TX descriptors: %d\n", error);
 		goto bad;
 	}
 
 	/*
 	 * Allocate RX descriptors and populate the lists.
 	 */
 	error = ath_rxdma_setup(sc);
 	if (error != 0) {
 		device_printf(sc->sc_dev,
 		     "failed to allocate RX descriptors: %d\n", error);
 		goto bad;
 	}
 
 	callout_init_mtx(&sc->sc_cal_ch, &sc->sc_mtx, 0);
 	callout_init_mtx(&sc->sc_wd_ch, &sc->sc_mtx, 0);
 
 	ATH_TXBUF_LOCK_INIT(sc);
 
 	sc->sc_tq = taskqueue_create("ath_taskq", M_NOWAIT,
 		taskqueue_thread_enqueue, &sc->sc_tq);
 	taskqueue_start_threads(&sc->sc_tq, 1, PI_NET, "%s taskq",
 	    device_get_nameunit(sc->sc_dev));
 
 	TASK_INIT(&sc->sc_rxtask, 0, sc->sc_rx.recv_tasklet, sc);
 	TASK_INIT(&sc->sc_bmisstask, 0, ath_bmiss_proc, sc);
 	TASK_INIT(&sc->sc_bstucktask,0, ath_bstuck_proc, sc);
 	TASK_INIT(&sc->sc_resettask,0, ath_reset_proc, sc);
 	TASK_INIT(&sc->sc_txqtask, 0, ath_txq_sched_tasklet, sc);
 	TASK_INIT(&sc->sc_fataltask, 0, ath_fatal_proc, sc);
 
 	/*
 	 * Allocate hardware transmit queues: one queue for
 	 * beacon frames and one data queue for each QoS
 	 * priority.  Note that the hal handles resetting
 	 * these queues at the needed time.
 	 *
 	 * XXX PS-Poll
 	 */
 	sc->sc_bhalq = ath_beaconq_setup(sc);
 	if (sc->sc_bhalq == (u_int) -1) {
 		device_printf(sc->sc_dev,
 		    "unable to setup a beacon xmit queue!\n");
 		error = EIO;
 		goto bad2;
 	}
 	sc->sc_cabq = ath_txq_setup(sc, HAL_TX_QUEUE_CAB, 0);
 	if (sc->sc_cabq == NULL) {
 		device_printf(sc->sc_dev, "unable to setup CAB xmit queue!\n");
 		error = EIO;
 		goto bad2;
 	}
 	/* NB: insure BK queue is the lowest priority h/w queue */
 	if (!ath_tx_setup(sc, WME_AC_BK, HAL_WME_AC_BK)) {
 		device_printf(sc->sc_dev,
 		    "unable to setup xmit queue for %s traffic!\n",
 		    ieee80211_wme_acnames[WME_AC_BK]);
 		error = EIO;
 		goto bad2;
 	}
 	if (!ath_tx_setup(sc, WME_AC_BE, HAL_WME_AC_BE) ||
 	    !ath_tx_setup(sc, WME_AC_VI, HAL_WME_AC_VI) ||
 	    !ath_tx_setup(sc, WME_AC_VO, HAL_WME_AC_VO)) {
 		/*
 		 * Not enough hardware tx queues to properly do WME;
 		 * just punt and assign them all to the same h/w queue.
 		 * We could do a better job of this if, for example,
 		 * we allocate queues when we switch from station to
 		 * AP mode.
 		 */
 		if (sc->sc_ac2q[WME_AC_VI] != NULL)
 			ath_tx_cleanupq(sc, sc->sc_ac2q[WME_AC_VI]);
 		if (sc->sc_ac2q[WME_AC_BE] != NULL)
 			ath_tx_cleanupq(sc, sc->sc_ac2q[WME_AC_BE]);
 		sc->sc_ac2q[WME_AC_BE] = sc->sc_ac2q[WME_AC_BK];
 		sc->sc_ac2q[WME_AC_VI] = sc->sc_ac2q[WME_AC_BK];
 		sc->sc_ac2q[WME_AC_VO] = sc->sc_ac2q[WME_AC_BK];
 	}
 
 	/*
 	 * Attach the TX completion function.
 	 *
 	 * The non-EDMA chips may have some special case optimisations;
 	 * this method gives everyone a chance to attach cleanly.
 	 */
 	sc->sc_tx.xmit_attach_comp_func(sc);
 
 	/*
 	 * Setup rate control.  Some rate control modules
 	 * call back to change the anntena state so expose
 	 * the necessary entry points.
 	 * XXX maybe belongs in struct ath_ratectrl?
 	 */
 	sc->sc_setdefantenna = ath_setdefantenna;
 	sc->sc_rc = ath_rate_attach(sc);
 	if (sc->sc_rc == NULL) {
 		error = EIO;
 		goto bad2;
 	}
 
 	/* Attach DFS module */
 	if (! ath_dfs_attach(sc)) {
 		device_printf(sc->sc_dev,
 		    "%s: unable to attach DFS\n", __func__);
 		error = EIO;
 		goto bad2;
 	}
 
 	/* Attach spectral module */
 	if (ath_spectral_attach(sc) < 0) {
 		device_printf(sc->sc_dev,
 		    "%s: unable to attach spectral\n", __func__);
 		error = EIO;
 		goto bad2;
 	}
 
 	/* Attach bluetooth coexistence module */
 	if (ath_btcoex_attach(sc) < 0) {
 		device_printf(sc->sc_dev,
 		    "%s: unable to attach bluetooth coexistence\n", __func__);
 		error = EIO;
 		goto bad2;
 	}
 
 	/* Attach LNA diversity module */
 	if (ath_lna_div_attach(sc) < 0) {
 		device_printf(sc->sc_dev,
 		    "%s: unable to attach LNA diversity\n", __func__);
 		error = EIO;
 		goto bad2;
 	}
 
 	/* Start DFS processing tasklet */
 	TASK_INIT(&sc->sc_dfstask, 0, ath_dfs_tasklet, sc);
 
 	/* Configure LED state */
 	sc->sc_blinking = 0;
 	sc->sc_ledstate = 1;
 	sc->sc_ledon = 0;			/* low true */
 	sc->sc_ledidle = (2700*hz)/1000;	/* 2.7sec */
 	callout_init(&sc->sc_ledtimer, 1);
 
 	/*
 	 * Don't setup hardware-based blinking.
 	 *
 	 * Although some NICs may have this configured in the
 	 * default reset register values, the user may wish
 	 * to alter which pins have which function.
 	 *
 	 * The reference driver attaches the MAC network LED to GPIO1 and
 	 * the MAC power LED to GPIO2.  However, the DWA-552 cardbus
 	 * NIC has these reversed.
 	 */
 	sc->sc_hardled = (1 == 0);
 	sc->sc_led_net_pin = -1;
 	sc->sc_led_pwr_pin = -1;
 	/*
 	 * Auto-enable soft led processing for IBM cards and for
 	 * 5211 minipci cards.  Users can also manually enable/disable
 	 * support with a sysctl.
 	 */
 	sc->sc_softled = (devid == AR5212_DEVID_IBM || devid == AR5211_DEVID);
 	ath_led_config(sc);
 	ath_hal_setledstate(ah, HAL_LED_INIT);
 
 	/* XXX not right but it's not used anywhere important */
 	ic->ic_phytype = IEEE80211_T_OFDM;
 	ic->ic_opmode = IEEE80211_M_STA;
 	ic->ic_caps =
 		  IEEE80211_C_STA		/* station mode */
 		| IEEE80211_C_IBSS		/* ibss, nee adhoc, mode */
 		| IEEE80211_C_HOSTAP		/* hostap mode */
 		| IEEE80211_C_MONITOR		/* monitor mode */
 		| IEEE80211_C_AHDEMO		/* adhoc demo mode */
 		| IEEE80211_C_WDS		/* 4-address traffic works */
 		| IEEE80211_C_MBSS		/* mesh point link mode */
 		| IEEE80211_C_SHPREAMBLE	/* short preamble supported */
 		| IEEE80211_C_SHSLOT		/* short slot time supported */
 		| IEEE80211_C_WPA		/* capable of WPA1+WPA2 */
 #ifndef	ATH_ENABLE_11N
 		| IEEE80211_C_BGSCAN		/* capable of bg scanning */
 #endif
 		| IEEE80211_C_TXFRAG		/* handle tx frags */
 #ifdef	ATH_ENABLE_DFS
 		| IEEE80211_C_DFS		/* Enable radar detection */
 #endif
 		| IEEE80211_C_PMGT		/* Station side power mgmt */
 		| IEEE80211_C_SWSLEEP
 		;
 	/*
 	 * Query the hal to figure out h/w crypto support.
 	 */
 	if (ath_hal_ciphersupported(ah, HAL_CIPHER_WEP))
 		ic->ic_cryptocaps |= IEEE80211_CRYPTO_WEP;
 	if (ath_hal_ciphersupported(ah, HAL_CIPHER_AES_OCB))
 		ic->ic_cryptocaps |= IEEE80211_CRYPTO_AES_OCB;
 	if (ath_hal_ciphersupported(ah, HAL_CIPHER_AES_CCM))
 		ic->ic_cryptocaps |= IEEE80211_CRYPTO_AES_CCM;
 	if (ath_hal_ciphersupported(ah, HAL_CIPHER_CKIP))
 		ic->ic_cryptocaps |= IEEE80211_CRYPTO_CKIP;
 	if (ath_hal_ciphersupported(ah, HAL_CIPHER_TKIP)) {
 		ic->ic_cryptocaps |= IEEE80211_CRYPTO_TKIP;
 		/*
 		 * Check if h/w does the MIC and/or whether the
 		 * separate key cache entries are required to
 		 * handle both tx+rx MIC keys.
 		 */
 		if (ath_hal_ciphersupported(ah, HAL_CIPHER_MIC))
 			ic->ic_cryptocaps |= IEEE80211_CRYPTO_TKIPMIC;
 		/*
 		 * If the h/w supports storing tx+rx MIC keys
 		 * in one cache slot automatically enable use.
 		 */
 		if (ath_hal_hastkipsplit(ah) ||
 		    !ath_hal_settkipsplit(ah, AH_FALSE))
 			sc->sc_splitmic = 1;
 		/*
 		 * If the h/w can do TKIP MIC together with WME then
 		 * we use it; otherwise we force the MIC to be done
 		 * in software by the net80211 layer.
 		 */
 		if (ath_hal_haswmetkipmic(ah))
 			sc->sc_wmetkipmic = 1;
 	}
 	sc->sc_hasclrkey = ath_hal_ciphersupported(ah, HAL_CIPHER_CLR);
 	/*
 	 * Check for multicast key search support.
 	 */
 	if (ath_hal_hasmcastkeysearch(sc->sc_ah) &&
 	    !ath_hal_getmcastkeysearch(sc->sc_ah)) {
 		ath_hal_setmcastkeysearch(sc->sc_ah, 1);
 	}
 	sc->sc_mcastkey = ath_hal_getmcastkeysearch(ah);
 	/*
 	 * Mark key cache slots associated with global keys
 	 * as in use.  If we knew TKIP was not to be used we
 	 * could leave the +32, +64, and +32+64 slots free.
 	 */
 	for (i = 0; i < IEEE80211_WEP_NKID; i++) {
 		setbit(sc->sc_keymap, i);
 		setbit(sc->sc_keymap, i+64);
 		if (sc->sc_splitmic) {
 			setbit(sc->sc_keymap, i+32);
 			setbit(sc->sc_keymap, i+32+64);
 		}
 	}
 	/*
 	 * TPC support can be done either with a global cap or
 	 * per-packet support.  The latter is not available on
 	 * all parts.  We're a bit pedantic here as all parts
 	 * support a global cap.
 	 */
 	if (ath_hal_hastpc(ah) || ath_hal_hastxpowlimit(ah))
 		ic->ic_caps |= IEEE80211_C_TXPMGT;
 
 	/*
 	 * Mark WME capability only if we have sufficient
 	 * hardware queues to do proper priority scheduling.
 	 */
 	if (sc->sc_ac2q[WME_AC_BE] != sc->sc_ac2q[WME_AC_BK])
 		ic->ic_caps |= IEEE80211_C_WME;
 	/*
 	 * Check for misc other capabilities.
 	 */
 	if (ath_hal_hasbursting(ah))
 		ic->ic_caps |= IEEE80211_C_BURST;
 	sc->sc_hasbmask = ath_hal_hasbssidmask(ah);
 	sc->sc_hasbmatch = ath_hal_hasbssidmatch(ah);
 	sc->sc_hastsfadd = ath_hal_hastsfadjust(ah);
 	sc->sc_rxslink = ath_hal_self_linked_final_rxdesc(ah);
 
 	/* XXX TODO: just make this a "store tx/rx timestamp length" operation */
 	if (ath_hal_get_rx_tsf_prec(ah, &i)) {
 		if (i == 32) {
 			sc->sc_rxtsf32 = 1;
 		}
 		if (bootverbose)
 			device_printf(sc->sc_dev, "RX timestamp: %d bits\n", i);
 	}
 	if (ath_hal_get_tx_tsf_prec(ah, &i)) {
 		if (bootverbose)
 			device_printf(sc->sc_dev, "TX timestamp: %d bits\n", i);
 	}
 
 	sc->sc_hasenforcetxop = ath_hal_hasenforcetxop(ah);
 	sc->sc_rx_lnamixer = ath_hal_hasrxlnamixer(ah);
 	sc->sc_hasdivcomb = ath_hal_hasdivantcomb(ah);
 
 	if (ath_hal_hasfastframes(ah))
 		ic->ic_caps |= IEEE80211_C_FF;
 	wmodes = ath_hal_getwirelessmodes(ah);
 	if (wmodes & (HAL_MODE_108G|HAL_MODE_TURBO))
 		ic->ic_caps |= IEEE80211_C_TURBOP;
 #ifdef IEEE80211_SUPPORT_TDMA
 	if (ath_hal_macversion(ah) > 0x78) {
 		ic->ic_caps |= IEEE80211_C_TDMA; /* capable of TDMA */
 		ic->ic_tdma_update = ath_tdma_update;
 	}
 #endif
 
 	/*
 	 * TODO: enforce that at least this many frames are available
 	 * in the txbuf list before allowing data frames (raw or
 	 * otherwise) to be transmitted.
 	 */
 	sc->sc_txq_data_minfree = 10;
 	/*
 	 * Leave this as default to maintain legacy behaviour.
 	 * Shortening the cabq/mcastq may end up causing some
 	 * undesirable behaviour.
 	 */
 	sc->sc_txq_mcastq_maxdepth = ath_txbuf;
 
 	/*
 	 * How deep can the node software TX queue get whilst it's asleep.
 	 */
 	sc->sc_txq_node_psq_maxdepth = 16;
 
 	/*
 	 * Default the maximum queue depth for a given node
 	 * to 1/4'th the TX buffers, or 64, whichever
 	 * is larger.
 	 */
 	sc->sc_txq_node_maxdepth = MAX(64, ath_txbuf / 4);
 
 	/* Enable CABQ by default */
 	sc->sc_cabq_enable = 1;
 
 	/*
 	 * Allow the TX and RX chainmasks to be overridden by
 	 * environment variables and/or device.hints.
 	 *
 	 * This must be done early - before the hardware is
 	 * calibrated or before the 802.11n stream calculation
 	 * is done.
 	 */
 	if (resource_int_value(device_get_name(sc->sc_dev),
 	    device_get_unit(sc->sc_dev), "rx_chainmask",
 	    &rx_chainmask) == 0) {
 		device_printf(sc->sc_dev, "Setting RX chainmask to 0x%x\n",
 		    rx_chainmask);
 		(void) ath_hal_setrxchainmask(sc->sc_ah, rx_chainmask);
 	}
 	if (resource_int_value(device_get_name(sc->sc_dev),
 	    device_get_unit(sc->sc_dev), "tx_chainmask",
 	    &tx_chainmask) == 0) {
 		device_printf(sc->sc_dev, "Setting TX chainmask to 0x%x\n",
 		    tx_chainmask);
 		(void) ath_hal_settxchainmask(sc->sc_ah, tx_chainmask);
 	}
 
 	/*
 	 * Query the TX/RX chainmask configuration.
 	 *
 	 * This is only relevant for 11n devices.
 	 */
 	ath_hal_getrxchainmask(ah, &sc->sc_rxchainmask);
 	ath_hal_gettxchainmask(ah, &sc->sc_txchainmask);
 
 	/*
 	 * Disable MRR with protected frames by default.
 	 * Only 802.11n series NICs can handle this.
 	 */
 	sc->sc_mrrprot = 0;	/* XXX should be a capability */
 
 	/*
 	 * Query the enterprise mode information the HAL.
 	 */
 	if (ath_hal_getcapability(ah, HAL_CAP_ENTERPRISE_MODE, 0,
 	    &sc->sc_ent_cfg) == HAL_OK)
 		sc->sc_use_ent = 1;
 
 #ifdef	ATH_ENABLE_11N
 	/*
 	 * Query HT capabilities
 	 */
 	if (ath_hal_getcapability(ah, HAL_CAP_HT, 0, NULL) == HAL_OK &&
 	    (wmodes & (HAL_MODE_HT20 | HAL_MODE_HT40))) {
 		uint32_t rxs, txs;
 		uint32_t ldpc;
 
 		device_printf(sc->sc_dev, "[HT] enabling HT modes\n");
 
 		sc->sc_mrrprot = 1;	/* XXX should be a capability */
 
 		ic->ic_htcaps = IEEE80211_HTC_HT	/* HT operation */
 			    | IEEE80211_HTC_AMPDU	/* A-MPDU tx/rx */
 			    | IEEE80211_HTC_AMSDU	/* A-MSDU tx/rx */
 			    | IEEE80211_HTCAP_MAXAMSDU_3839
 			    				/* max A-MSDU length */
 			    | IEEE80211_HTCAP_SMPS_OFF;	/* SM power save off */
 
 		/*
 		 * Enable short-GI for HT20 only if the hardware
 		 * advertises support.
 		 * Notably, anything earlier than the AR9287 doesn't.
 		 */
 		if ((ath_hal_getcapability(ah,
 		    HAL_CAP_HT20_SGI, 0, NULL) == HAL_OK) &&
 		    (wmodes & HAL_MODE_HT20)) {
 			device_printf(sc->sc_dev,
 			    "[HT] enabling short-GI in 20MHz mode\n");
 			ic->ic_htcaps |= IEEE80211_HTCAP_SHORTGI20;
 		}
 
 		if (wmodes & HAL_MODE_HT40)
 			ic->ic_htcaps |= IEEE80211_HTCAP_CHWIDTH40
 			    |  IEEE80211_HTCAP_SHORTGI40;
 
 		/*
 		 * TX/RX streams need to be taken into account when
 		 * negotiating which MCS rates it'll receive and
 		 * what MCS rates are available for TX.
 		 */
 		(void) ath_hal_getcapability(ah, HAL_CAP_STREAMS, 0, &txs);
 		(void) ath_hal_getcapability(ah, HAL_CAP_STREAMS, 1, &rxs);
 		ic->ic_txstream = txs;
 		ic->ic_rxstream = rxs;
 
 		/*
 		 * Setup TX and RX STBC based on what the HAL allows and
 		 * the currently configured chainmask set.
 		 * Ie - don't enable STBC TX if only one chain is enabled.
 		 * STBC RX is fine on a single RX chain; it just won't
 		 * provide any real benefit.
 		 */
 		if (ath_hal_getcapability(ah, HAL_CAP_RX_STBC, 0,
 		    NULL) == HAL_OK) {
 			sc->sc_rx_stbc = 1;
 			device_printf(sc->sc_dev,
 			    "[HT] 1 stream STBC receive enabled\n");
 			ic->ic_htcaps |= IEEE80211_HTCAP_RXSTBC_1STREAM;
 		}
 		if (txs > 1 && ath_hal_getcapability(ah, HAL_CAP_TX_STBC, 0,
 		    NULL) == HAL_OK) {
 			sc->sc_tx_stbc = 1;
 			device_printf(sc->sc_dev,
 			    "[HT] 1 stream STBC transmit enabled\n");
 			ic->ic_htcaps |= IEEE80211_HTCAP_TXSTBC;
 		}
 
 		(void) ath_hal_getcapability(ah, HAL_CAP_RTS_AGGR_LIMIT, 1,
 		    &sc->sc_rts_aggr_limit);
 		if (sc->sc_rts_aggr_limit != (64 * 1024))
 			device_printf(sc->sc_dev,
 			    "[HT] RTS aggregates limited to %d KiB\n",
 			    sc->sc_rts_aggr_limit / 1024);
 
 		/*
 		 * LDPC
 		 */
 		if ((ath_hal_getcapability(ah, HAL_CAP_LDPC, 0, &ldpc))
 		    == HAL_OK && (ldpc == 1)) {
 			sc->sc_has_ldpc = 1;
 			device_printf(sc->sc_dev,
 			    "[HT] LDPC transmit/receive enabled\n");
 			ic->ic_htcaps |= IEEE80211_HTCAP_LDPC;
 		}
 
 
 		device_printf(sc->sc_dev,
 		    "[HT] %d RX streams; %d TX streams\n", rxs, txs);
 	}
 #endif
 
 	/*
 	 * Initial aggregation settings.
 	 */
 	sc->sc_hwq_limit_aggr = ATH_AGGR_MIN_QDEPTH;
 	sc->sc_hwq_limit_nonaggr = ATH_NONAGGR_MIN_QDEPTH;
 	sc->sc_tid_hwq_lo = ATH_AGGR_SCHED_LOW;
 	sc->sc_tid_hwq_hi = ATH_AGGR_SCHED_HIGH;
 	sc->sc_aggr_limit = ATH_AGGR_MAXSIZE;
 	sc->sc_delim_min_pad = 0;
 
 	/*
 	 * Check if the hardware requires PCI register serialisation.
 	 * Some of the Owl based MACs require this.
 	 */
 	if (mp_ncpus > 1 &&
 	    ath_hal_getcapability(ah, HAL_CAP_SERIALISE_WAR,
 	     0, NULL) == HAL_OK) {
 		sc->sc_ah->ah_config.ah_serialise_reg_war = 1;
 		device_printf(sc->sc_dev,
 		    "Enabling register serialisation\n");
 	}
 
 	/*
 	 * Initialise the deferred completed RX buffer list.
 	 */
 	TAILQ_INIT(&sc->sc_rx_rxlist[HAL_RX_QUEUE_HP]);
 	TAILQ_INIT(&sc->sc_rx_rxlist[HAL_RX_QUEUE_LP]);
 
 	/*
 	 * Indicate we need the 802.11 header padded to a
 	 * 32-bit boundary for 4-address and QoS frames.
 	 */
 	ic->ic_flags |= IEEE80211_F_DATAPAD;
 
 	/*
 	 * Query the hal about antenna support.
 	 */
 	sc->sc_defant = ath_hal_getdefantenna(ah);
 
 	/*
 	 * Not all chips have the VEOL support we want to
 	 * use with IBSS beacons; check here for it.
 	 */
 	sc->sc_hasveol = ath_hal_hasveol(ah);
 
 	/* get mac address from kenv first, then hardware */
 	if (ath_fetch_mac_kenv(sc, ic->ic_macaddr) == 0) {
 		/* Tell the HAL now about the new MAC */
 		ath_hal_setmac(ah, ic->ic_macaddr);
 	} else {
 		ath_hal_getmac(ah, ic->ic_macaddr);
 	}
 
 	if (sc->sc_hasbmask)
 		ath_hal_getbssidmask(ah, sc->sc_hwbssidmask);
 
 	/* NB: used to size node table key mapping array */
 	ic->ic_max_keyix = sc->sc_keymax;
 	/* call MI attach routine. */
 	ieee80211_ifattach(ic);
 	ic->ic_setregdomain = ath_setregdomain;
 	ic->ic_getradiocaps = ath_getradiocaps;
 	sc->sc_opmode = HAL_M_STA;
 
 	/* override default methods */
 	ic->ic_ioctl = ath_ioctl;
 	ic->ic_parent = ath_parent;
 	ic->ic_transmit = ath_transmit;
 	ic->ic_newassoc = ath_newassoc;
 	ic->ic_updateslot = ath_updateslot;
 	ic->ic_wme.wme_update = ath_wme_update;
 	ic->ic_vap_create = ath_vap_create;
 	ic->ic_vap_delete = ath_vap_delete;
 	ic->ic_raw_xmit = ath_raw_xmit;
 	ic->ic_update_mcast = ath_update_mcast;
 	ic->ic_update_promisc = ath_update_promisc;
 	ic->ic_node_alloc = ath_node_alloc;
 	sc->sc_node_free = ic->ic_node_free;
 	ic->ic_node_free = ath_node_free;
 	sc->sc_node_cleanup = ic->ic_node_cleanup;
 	ic->ic_node_cleanup = ath_node_cleanup;
 	ic->ic_node_getsignal = ath_node_getsignal;
 	ic->ic_scan_start = ath_scan_start;
 	ic->ic_scan_end = ath_scan_end;
 	ic->ic_set_channel = ath_set_channel;
 #ifdef	ATH_ENABLE_11N
 	/* 802.11n specific - but just override anyway */
 	sc->sc_addba_request = ic->ic_addba_request;
 	sc->sc_addba_response = ic->ic_addba_response;
 	sc->sc_addba_stop = ic->ic_addba_stop;
 	sc->sc_bar_response = ic->ic_bar_response;
 	sc->sc_addba_response_timeout = ic->ic_addba_response_timeout;
 
 	ic->ic_addba_request = ath_addba_request;
 	ic->ic_addba_response = ath_addba_response;
 	ic->ic_addba_response_timeout = ath_addba_response_timeout;
 	ic->ic_addba_stop = ath_addba_stop;
 	ic->ic_bar_response = ath_bar_response;
 
 	ic->ic_update_chw = ath_update_chw;
 #endif	/* ATH_ENABLE_11N */
 
 #ifdef	ATH_ENABLE_RADIOTAP_VENDOR_EXT
 	/*
 	 * There's one vendor bitmap entry in the RX radiotap
 	 * header; make sure that's taken into account.
 	 */
 	ieee80211_radiotap_attachv(ic,
 	    &sc->sc_tx_th.wt_ihdr, sizeof(sc->sc_tx_th), 0,
 		ATH_TX_RADIOTAP_PRESENT,
 	    &sc->sc_rx_th.wr_ihdr, sizeof(sc->sc_rx_th), 1,
 		ATH_RX_RADIOTAP_PRESENT);
 #else
 	/*
 	 * No vendor bitmap/extensions are present.
 	 */
 	ieee80211_radiotap_attach(ic,
 	    &sc->sc_tx_th.wt_ihdr, sizeof(sc->sc_tx_th),
 		ATH_TX_RADIOTAP_PRESENT,
 	    &sc->sc_rx_th.wr_ihdr, sizeof(sc->sc_rx_th),
 		ATH_RX_RADIOTAP_PRESENT);
 #endif	/* ATH_ENABLE_RADIOTAP_VENDOR_EXT */
 
 	/*
 	 * Setup the ALQ logging if required
 	 */
 #ifdef	ATH_DEBUG_ALQ
 	if_ath_alq_init(&sc->sc_alq, device_get_nameunit(sc->sc_dev));
 	if_ath_alq_setcfg(&sc->sc_alq,
 	    sc->sc_ah->ah_macVersion,
 	    sc->sc_ah->ah_macRev,
 	    sc->sc_ah->ah_phyRev,
 	    sc->sc_ah->ah_magic);
 #endif
 
 	/*
 	 * Setup dynamic sysctl's now that country code and
 	 * regdomain are available from the hal.
 	 */
 	ath_sysctlattach(sc);
 	ath_sysctl_stats_attach(sc);
 	ath_sysctl_hal_attach(sc);
 
 	if (bootverbose)
 		ieee80211_announce(ic);
 	ath_announce(sc);
 
 	/*
 	 * Put it to sleep for now.
 	 */
 	ATH_LOCK(sc);
-	ath_power_setpower(sc, HAL_PM_FULL_SLEEP);
+	ath_power_setpower(sc, HAL_PM_FULL_SLEEP, 1);
 	ATH_UNLOCK(sc);
 
 	return 0;
 bad2:
 	ath_tx_cleanup(sc);
 	ath_desc_free(sc);
 	ath_txdma_teardown(sc);
 	ath_rxdma_teardown(sc);
 bad:
 	if (ah)
 		ath_hal_detach(ah);
 	sc->sc_invalid = 1;
 	return error;
 }
 
 int
 ath_detach(struct ath_softc *sc)
 {
 
 	/*
 	 * NB: the order of these is important:
 	 * o stop the chip so no more interrupts will fire
 	 * o call the 802.11 layer before detaching the hal to
 	 *   insure callbacks into the driver to delete global
 	 *   key cache entries can be handled
 	 * o free the taskqueue which drains any pending tasks
 	 * o reclaim the tx queue data structures after calling
 	 *   the 802.11 layer as we'll get called back to reclaim
 	 *   node state and potentially want to use them
 	 * o to cleanup the tx queues the hal is called, so detach
 	 *   it last
 	 * Other than that, it's straightforward...
 	 */
 
 	/*
 	 * XXX Wake the hardware up first.  ath_stop() will still
 	 * wake it up first, but I'd rather do it here just to
 	 * ensure it's awake.
 	 */
 	ATH_LOCK(sc);
 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
-	ath_power_setpower(sc, HAL_PM_AWAKE);
+	ath_power_setpower(sc, HAL_PM_AWAKE, 1);
 
 	/*
 	 * Stop things cleanly.
 	 */
 	ath_stop(sc);
 	ATH_UNLOCK(sc);
 
 	ieee80211_ifdetach(&sc->sc_ic);
 	taskqueue_free(sc->sc_tq);
 #ifdef ATH_TX99_DIAG
 	if (sc->sc_tx99 != NULL)
 		sc->sc_tx99->detach(sc->sc_tx99);
 #endif
 	ath_rate_detach(sc->sc_rc);
 #ifdef	ATH_DEBUG_ALQ
 	if_ath_alq_tidyup(&sc->sc_alq);
 #endif
 	ath_lna_div_detach(sc);
 	ath_btcoex_detach(sc);
 	ath_spectral_detach(sc);
 	ath_dfs_detach(sc);
 	ath_desc_free(sc);
 	ath_txdma_teardown(sc);
 	ath_rxdma_teardown(sc);
 	ath_tx_cleanup(sc);
 	ath_hal_detach(sc->sc_ah);	/* NB: sets chip in full sleep */
 
 	return 0;
 }
 
 /*
  * MAC address handling for multiple BSS on the same radio.
  * The first vap uses the MAC address from the EEPROM.  For
  * subsequent vap's we set the U/L bit (bit 1) in the MAC
  * address and use the next six bits as an index.
  */
 static void
 assign_address(struct ath_softc *sc, uint8_t mac[IEEE80211_ADDR_LEN], int clone)
 {
 	int i;
 
 	if (clone && sc->sc_hasbmask) {
 		/* NB: we only do this if h/w supports multiple bssid */
 		for (i = 0; i < 8; i++)
 			if ((sc->sc_bssidmask & (1<<i)) == 0)
 				break;
 		if (i != 0)
 			mac[0] |= (i << 2)|0x2;
 	} else
 		i = 0;
 	sc->sc_bssidmask |= 1<<i;
 	sc->sc_hwbssidmask[0] &= ~mac[0];
 	if (i == 0)
 		sc->sc_nbssid0++;
 }
 
 static void
 reclaim_address(struct ath_softc *sc, const uint8_t mac[IEEE80211_ADDR_LEN])
 {
 	int i = mac[0] >> 2;
 	uint8_t mask;
 
 	if (i != 0 || --sc->sc_nbssid0 == 0) {
 		sc->sc_bssidmask &= ~(1<<i);
 		/* recalculate bssid mask from remaining addresses */
 		mask = 0xff;
 		for (i = 1; i < 8; i++)
 			if (sc->sc_bssidmask & (1<<i))
 				mask &= ~((i<<2)|0x2);
 		sc->sc_hwbssidmask[0] |= mask;
 	}
 }
 
 /*
  * Assign a beacon xmit slot.  We try to space out
  * assignments so when beacons are staggered the
  * traffic coming out of the cab q has maximal time
  * to go out before the next beacon is scheduled.
  */
 static int
 assign_bslot(struct ath_softc *sc)
 {
 	u_int slot, free;
 
 	free = 0;
 	for (slot = 0; slot < ATH_BCBUF; slot++)
 		if (sc->sc_bslot[slot] == NULL) {
 			if (sc->sc_bslot[(slot+1)%ATH_BCBUF] == NULL &&
 			    sc->sc_bslot[(slot-1)%ATH_BCBUF] == NULL)
 				return slot;
 			free = slot;
 			/* NB: keep looking for a double slot */
 		}
 	return free;
 }
 
 static struct ieee80211vap *
 ath_vap_create(struct ieee80211com *ic, const char name[IFNAMSIZ], int unit,
     enum ieee80211_opmode opmode, int flags,
     const uint8_t bssid[IEEE80211_ADDR_LEN],
     const uint8_t mac0[IEEE80211_ADDR_LEN])
 {
 	struct ath_softc *sc = ic->ic_softc;
 	struct ath_vap *avp;
 	struct ieee80211vap *vap;
 	uint8_t mac[IEEE80211_ADDR_LEN];
 	int needbeacon, error;
 	enum ieee80211_opmode ic_opmode;
 
 	avp = malloc(sizeof(struct ath_vap), M_80211_VAP, M_WAITOK | M_ZERO);
 	needbeacon = 0;
 	IEEE80211_ADDR_COPY(mac, mac0);
 
 	ATH_LOCK(sc);
 	ic_opmode = opmode;		/* default to opmode of new vap */
 	switch (opmode) {
 	case IEEE80211_M_STA:
 		if (sc->sc_nstavaps != 0) {	/* XXX only 1 for now */
 			device_printf(sc->sc_dev, "only 1 sta vap supported\n");
 			goto bad;
 		}
 		if (sc->sc_nvaps) {
 			/*
 			 * With multiple vaps we must fall back
 			 * to s/w beacon miss handling.
 			 */
 			flags |= IEEE80211_CLONE_NOBEACONS;
 		}
 		if (flags & IEEE80211_CLONE_NOBEACONS) {
 			/*
 			 * Station mode w/o beacons are implemented w/ AP mode.
 			 */
 			ic_opmode = IEEE80211_M_HOSTAP;
 		}
 		break;
 	case IEEE80211_M_IBSS:
 		if (sc->sc_nvaps != 0) {	/* XXX only 1 for now */
 			device_printf(sc->sc_dev,
 			    "only 1 ibss vap supported\n");
 			goto bad;
 		}
 		needbeacon = 1;
 		break;
 	case IEEE80211_M_AHDEMO:
 #ifdef IEEE80211_SUPPORT_TDMA
 		if (flags & IEEE80211_CLONE_TDMA) {
 			if (sc->sc_nvaps != 0) {
 				device_printf(sc->sc_dev,
 				    "only 1 tdma vap supported\n");
 				goto bad;
 			}
 			needbeacon = 1;
 			flags |= IEEE80211_CLONE_NOBEACONS;
 		}
 		/* fall thru... */
 #endif
 	case IEEE80211_M_MONITOR:
 		if (sc->sc_nvaps != 0 && ic->ic_opmode != opmode) {
 			/*
 			 * Adopt existing mode.  Adding a monitor or ahdemo
 			 * vap to an existing configuration is of dubious
 			 * value but should be ok.
 			 */
 			/* XXX not right for monitor mode */
 			ic_opmode = ic->ic_opmode;
 		}
 		break;
 	case IEEE80211_M_HOSTAP:
 	case IEEE80211_M_MBSS:
 		needbeacon = 1;
 		break;
 	case IEEE80211_M_WDS:
 		if (sc->sc_nvaps != 0 && ic->ic_opmode == IEEE80211_M_STA) {
 			device_printf(sc->sc_dev,
 			    "wds not supported in sta mode\n");
 			goto bad;
 		}
 		/*
 		 * Silently remove any request for a unique
 		 * bssid; WDS vap's always share the local
 		 * mac address.
 		 */
 		flags &= ~IEEE80211_CLONE_BSSID;
 		if (sc->sc_nvaps == 0)
 			ic_opmode = IEEE80211_M_HOSTAP;
 		else
 			ic_opmode = ic->ic_opmode;
 		break;
 	default:
 		device_printf(sc->sc_dev, "unknown opmode %d\n", opmode);
 		goto bad;
 	}
 	/*
 	 * Check that a beacon buffer is available; the code below assumes it.
 	 */
 	if (needbeacon & TAILQ_EMPTY(&sc->sc_bbuf)) {
 		device_printf(sc->sc_dev, "no beacon buffer available\n");
 		goto bad;
 	}
 
 	/* STA, AHDEMO? */
 	if (opmode == IEEE80211_M_HOSTAP || opmode == IEEE80211_M_MBSS) {
 		assign_address(sc, mac, flags & IEEE80211_CLONE_BSSID);
 		ath_hal_setbssidmask(sc->sc_ah, sc->sc_hwbssidmask);
 	}
 
 	vap = &avp->av_vap;
 	/* XXX can't hold mutex across if_alloc */
 	ATH_UNLOCK(sc);
 	error = ieee80211_vap_setup(ic, vap, name, unit, opmode, flags, bssid);
 	ATH_LOCK(sc);
 	if (error != 0) {
 		device_printf(sc->sc_dev, "%s: error %d creating vap\n",
 		    __func__, error);
 		goto bad2;
 	}
 
 	/* h/w crypto support */
 	vap->iv_key_alloc = ath_key_alloc;
 	vap->iv_key_delete = ath_key_delete;
 	vap->iv_key_set = ath_key_set;
 	vap->iv_key_update_begin = ath_key_update_begin;
 	vap->iv_key_update_end = ath_key_update_end;
 
 	/* override various methods */
 	avp->av_recv_mgmt = vap->iv_recv_mgmt;
 	vap->iv_recv_mgmt = ath_recv_mgmt;
 	vap->iv_reset = ath_reset_vap;
 	vap->iv_update_beacon = ath_beacon_update;
 	avp->av_newstate = vap->iv_newstate;
 	vap->iv_newstate = ath_newstate;
 	avp->av_bmiss = vap->iv_bmiss;
 	vap->iv_bmiss = ath_bmiss_vap;
 
 	avp->av_node_ps = vap->iv_node_ps;
 	vap->iv_node_ps = ath_node_powersave;
 
 	avp->av_set_tim = vap->iv_set_tim;
 	vap->iv_set_tim = ath_node_set_tim;
 
 	avp->av_recv_pspoll = vap->iv_recv_pspoll;
 	vap->iv_recv_pspoll = ath_node_recv_pspoll;
 
 	/* Set default parameters */
 
 	/*
 	 * Anything earlier than some AR9300 series MACs don't
 	 * support a smaller MPDU density.
 	 */
 	vap->iv_ampdu_density = IEEE80211_HTCAP_MPDUDENSITY_8;
 	/*
 	 * All NICs can handle the maximum size, however
 	 * AR5416 based MACs can only TX aggregates w/ RTS
 	 * protection when the total aggregate size is <= 8k.
 	 * However, for now that's enforced by the TX path.
 	 */
 	vap->iv_ampdu_rxmax = IEEE80211_HTCAP_MAXRXAMPDU_64K;
 
 	avp->av_bslot = -1;
 	if (needbeacon) {
 		/*
 		 * Allocate beacon state and setup the q for buffered
 		 * multicast frames.  We know a beacon buffer is
 		 * available because we checked above.
 		 */
 		avp->av_bcbuf = TAILQ_FIRST(&sc->sc_bbuf);
 		TAILQ_REMOVE(&sc->sc_bbuf, avp->av_bcbuf, bf_list);
 		if (opmode != IEEE80211_M_IBSS || !sc->sc_hasveol) {
 			/*
 			 * Assign the vap to a beacon xmit slot.  As above
 			 * this cannot fail to find a free one.
 			 */
 			avp->av_bslot = assign_bslot(sc);
 			KASSERT(sc->sc_bslot[avp->av_bslot] == NULL,
 			    ("beacon slot %u not empty", avp->av_bslot));
 			sc->sc_bslot[avp->av_bslot] = vap;
 			sc->sc_nbcnvaps++;
 		}
 		if (sc->sc_hastsfadd && sc->sc_nbcnvaps > 0) {
 			/*
 			 * Multple vaps are to transmit beacons and we
 			 * have h/w support for TSF adjusting; enable
 			 * use of staggered beacons.
 			 */
 			sc->sc_stagbeacons = 1;
 		}
 		ath_txq_init(sc, &avp->av_mcastq, ATH_TXQ_SWQ);
 	}
 
 	ic->ic_opmode = ic_opmode;
 	if (opmode != IEEE80211_M_WDS) {
 		sc->sc_nvaps++;
 		if (opmode == IEEE80211_M_STA)
 			sc->sc_nstavaps++;
 		if (opmode == IEEE80211_M_MBSS)
 			sc->sc_nmeshvaps++;
 	}
 	switch (ic_opmode) {
 	case IEEE80211_M_IBSS:
 		sc->sc_opmode = HAL_M_IBSS;
 		break;
 	case IEEE80211_M_STA:
 		sc->sc_opmode = HAL_M_STA;
 		break;
 	case IEEE80211_M_AHDEMO:
 #ifdef IEEE80211_SUPPORT_TDMA
 		if (vap->iv_caps & IEEE80211_C_TDMA) {
 			sc->sc_tdma = 1;
 			/* NB: disable tsf adjust */
 			sc->sc_stagbeacons = 0;
 		}
 		/*
 		 * NB: adhoc demo mode is a pseudo mode; to the hal it's
 		 * just ap mode.
 		 */
 		/* fall thru... */
 #endif
 	case IEEE80211_M_HOSTAP:
 	case IEEE80211_M_MBSS:
 		sc->sc_opmode = HAL_M_HOSTAP;
 		break;
 	case IEEE80211_M_MONITOR:
 		sc->sc_opmode = HAL_M_MONITOR;
 		break;
 	default:
 		/* XXX should not happen */
 		break;
 	}
 	if (sc->sc_hastsfadd) {
 		/*
 		 * Configure whether or not TSF adjust should be done.
 		 */
 		ath_hal_settsfadjust(sc->sc_ah, sc->sc_stagbeacons);
 	}
 	if (flags & IEEE80211_CLONE_NOBEACONS) {
 		/*
 		 * Enable s/w beacon miss handling.
 		 */
 		sc->sc_swbmiss = 1;
 	}
 	ATH_UNLOCK(sc);
 
 	/* complete setup */
 	ieee80211_vap_attach(vap, ath_media_change, ieee80211_media_status,
 	    mac);
 	return vap;
 bad2:
 	reclaim_address(sc, mac);
 	ath_hal_setbssidmask(sc->sc_ah, sc->sc_hwbssidmask);
 bad:
 	free(avp, M_80211_VAP);
 	ATH_UNLOCK(sc);
 	return NULL;
 }
 
 static void
 ath_vap_delete(struct ieee80211vap *vap)
 {
 	struct ieee80211com *ic = vap->iv_ic;
 	struct ath_softc *sc = ic->ic_softc;
 	struct ath_hal *ah = sc->sc_ah;
 	struct ath_vap *avp = ATH_VAP(vap);
 
 	ATH_LOCK(sc);
 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
 	ATH_UNLOCK(sc);
 
 	DPRINTF(sc, ATH_DEBUG_RESET, "%s: called\n", __func__);
 	if (sc->sc_running) {
 		/*
 		 * Quiesce the hardware while we remove the vap.  In
 		 * particular we need to reclaim all references to
 		 * the vap state by any frames pending on the tx queues.
 		 */
 		ath_hal_intrset(ah, 0);		/* disable interrupts */
 		/* XXX Do all frames from all vaps/nodes need draining here? */
 		ath_stoprecv(sc, 1);		/* stop recv side */
 		ath_draintxq(sc, ATH_RESET_DEFAULT);		/* stop hw xmit side */
 	}
 
 	/* .. leave the hardware awake for now. */
 
 	ieee80211_vap_detach(vap);
 
 	/*
 	 * XXX Danger Will Robinson! Danger!
 	 *
 	 * Because ieee80211_vap_detach() can queue a frame (the station
 	 * diassociate message?) after we've drained the TXQ and
 	 * flushed the software TXQ, we will end up with a frame queued
 	 * to a node whose vap is about to be freed.
 	 *
 	 * To work around this, flush the hardware/software again.
 	 * This may be racy - the ath task may be running and the packet
 	 * may be being scheduled between sw->hw txq. Tsk.
 	 *
 	 * TODO: figure out why a new node gets allocated somewhere around
 	 * here (after the ath_tx_swq() call; and after an ath_stop()
 	 * call!)
 	 */
 
 	ath_draintxq(sc, ATH_RESET_DEFAULT);
 
 	ATH_LOCK(sc);
 	/*
 	 * Reclaim beacon state.  Note this must be done before
 	 * the vap instance is reclaimed as we may have a reference
 	 * to it in the buffer for the beacon frame.
 	 */
 	if (avp->av_bcbuf != NULL) {
 		if (avp->av_bslot != -1) {
 			sc->sc_bslot[avp->av_bslot] = NULL;
 			sc->sc_nbcnvaps--;
 		}
 		ath_beacon_return(sc, avp->av_bcbuf);
 		avp->av_bcbuf = NULL;
 		if (sc->sc_nbcnvaps == 0) {
 			sc->sc_stagbeacons = 0;
 			if (sc->sc_hastsfadd)
 				ath_hal_settsfadjust(sc->sc_ah, 0);
 		}
 		/*
 		 * Reclaim any pending mcast frames for the vap.
 		 */
 		ath_tx_draintxq(sc, &avp->av_mcastq);
 	}
 	/*
 	 * Update bookkeeping.
 	 */
 	if (vap->iv_opmode == IEEE80211_M_STA) {
 		sc->sc_nstavaps--;
 		if (sc->sc_nstavaps == 0 && sc->sc_swbmiss)
 			sc->sc_swbmiss = 0;
 	} else if (vap->iv_opmode == IEEE80211_M_HOSTAP ||
 	    vap->iv_opmode == IEEE80211_M_MBSS) {
 		reclaim_address(sc, vap->iv_myaddr);
 		ath_hal_setbssidmask(ah, sc->sc_hwbssidmask);
 		if (vap->iv_opmode == IEEE80211_M_MBSS)
 			sc->sc_nmeshvaps--;
 	}
 	if (vap->iv_opmode != IEEE80211_M_WDS)
 		sc->sc_nvaps--;
 #ifdef IEEE80211_SUPPORT_TDMA
 	/* TDMA operation ceases when the last vap is destroyed */
 	if (sc->sc_tdma && sc->sc_nvaps == 0) {
 		sc->sc_tdma = 0;
 		sc->sc_swbmiss = 0;
 	}
 #endif
 	free(avp, M_80211_VAP);
 
 	if (sc->sc_running) {
 		/*
 		 * Restart rx+tx machines if still running (RUNNING will
 		 * be reset if we just destroyed the last vap).
 		 */
 		if (ath_startrecv(sc) != 0)
 			device_printf(sc->sc_dev,
 			    "%s: unable to restart recv logic\n", __func__);
 		if (sc->sc_beacons) {		/* restart beacons */
 #ifdef IEEE80211_SUPPORT_TDMA
 			if (sc->sc_tdma)
 				ath_tdma_config(sc, NULL);
 			else
 #endif
 				ath_beacon_config(sc, NULL);
 		}
 		ath_hal_intrset(ah, sc->sc_imask);
 	}
 
 	/* Ok, let the hardware asleep. */
 	ath_power_restore_power_state(sc);
 	ATH_UNLOCK(sc);
 }
 
 void
 ath_suspend(struct ath_softc *sc)
 {
 	struct ieee80211com *ic = &sc->sc_ic;
 
 	sc->sc_resume_up = ic->ic_nrunning != 0;
 
 	ieee80211_suspend_all(ic);
 	/*
 	 * NB: don't worry about putting the chip in low power
 	 * mode; pci will power off our socket on suspend and
 	 * CardBus detaches the device.
 	 *
 	 * XXX TODO: well, that's great, except for non-cardbus
 	 * devices!
 	 */
 
 	/*
 	 * XXX This doesn't wait until all pending taskqueue
 	 * items and parallel transmit/receive/other threads
 	 * are running!
 	 */
 	ath_hal_intrset(sc->sc_ah, 0);
 	taskqueue_block(sc->sc_tq);
 
 	ATH_LOCK(sc);
 	callout_stop(&sc->sc_cal_ch);
 	ATH_UNLOCK(sc);
 
 	/*
 	 * XXX ensure sc_invalid is 1
 	 */
 
 	/* Disable the PCIe PHY, complete with workarounds */
 	ath_hal_enablepcie(sc->sc_ah, 1, 1);
 }
 
 /*
  * Reset the key cache since some parts do not reset the
  * contents on resume.  First we clear all entries, then
  * re-load keys that the 802.11 layer assumes are setup
  * in h/w.
  */
 static void
 ath_reset_keycache(struct ath_softc *sc)
 {
 	struct ieee80211com *ic = &sc->sc_ic;
 	struct ath_hal *ah = sc->sc_ah;
 	int i;
 
 	ATH_LOCK(sc);
 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
 	for (i = 0; i < sc->sc_keymax; i++)
 		ath_hal_keyreset(ah, i);
 	ath_power_restore_power_state(sc);
 	ATH_UNLOCK(sc);
 	ieee80211_crypto_reload_keys(ic);
 }
 
 /*
  * Fetch the current chainmask configuration based on the current
  * operating channel and options.
  */
 static void
 ath_update_chainmasks(struct ath_softc *sc, struct ieee80211_channel *chan)
 {
 
 	/*
 	 * Set TX chainmask to the currently configured chainmask;
 	 * the TX chainmask depends upon the current operating mode.
 	 */
 	sc->sc_cur_rxchainmask = sc->sc_rxchainmask;
 	if (IEEE80211_IS_CHAN_HT(chan)) {
 		sc->sc_cur_txchainmask = sc->sc_txchainmask;
 	} else {
 		sc->sc_cur_txchainmask = 1;
 	}
 
 	DPRINTF(sc, ATH_DEBUG_RESET,
 	    "%s: TX chainmask is now 0x%x, RX is now 0x%x\n",
 	    __func__,
 	    sc->sc_cur_txchainmask,
 	    sc->sc_cur_rxchainmask);
 }
 
 void
 ath_resume(struct ath_softc *sc)
 {
 	struct ieee80211com *ic = &sc->sc_ic;
 	struct ath_hal *ah = sc->sc_ah;
 	HAL_STATUS status;
 
 	ath_hal_enablepcie(ah, 0, 0);
 
 	/*
 	 * Must reset the chip before we reload the
 	 * keycache as we were powered down on suspend.
 	 */
 	ath_update_chainmasks(sc,
 	    sc->sc_curchan != NULL ? sc->sc_curchan : ic->ic_curchan);
 	ath_hal_setchainmasks(sc->sc_ah, sc->sc_cur_txchainmask,
 	    sc->sc_cur_rxchainmask);
 
 	/* Ensure we set the current power state to on */
 	ATH_LOCK(sc);
 	ath_power_setselfgen(sc, HAL_PM_AWAKE);
 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
-	ath_power_setpower(sc, HAL_PM_AWAKE);
+	ath_power_setpower(sc, HAL_PM_AWAKE, 1);
 	ATH_UNLOCK(sc);
 
 	ath_hal_reset(ah, sc->sc_opmode,
 	    sc->sc_curchan != NULL ? sc->sc_curchan : ic->ic_curchan,
 	    AH_FALSE, HAL_RESET_NORMAL, &status);
 	ath_reset_keycache(sc);
 
 	ATH_RX_LOCK(sc);
 	sc->sc_rx_stopped = 1;
 	sc->sc_rx_resetted = 1;
 	ATH_RX_UNLOCK(sc);
 
 	/* Let DFS at it in case it's a DFS channel */
 	ath_dfs_radar_enable(sc, ic->ic_curchan);
 
 	/* Let spectral at in case spectral is enabled */
 	ath_spectral_enable(sc, ic->ic_curchan);
 
 	/*
 	 * Let bluetooth coexistence at in case it's needed for this channel
 	 */
 	ath_btcoex_enable(sc, ic->ic_curchan);
 
 	/*
 	 * If we're doing TDMA, enforce the TXOP limitation for chips that
 	 * support it.
 	 */
 	if (sc->sc_hasenforcetxop && sc->sc_tdma)
 		ath_hal_setenforcetxop(sc->sc_ah, 1);
 	else
 		ath_hal_setenforcetxop(sc->sc_ah, 0);
 
 	/* Restore the LED configuration */
 	ath_led_config(sc);
 	ath_hal_setledstate(ah, HAL_LED_INIT);
 
 	if (sc->sc_resume_up)
 		ieee80211_resume_all(ic);
 
 	ATH_LOCK(sc);
 	ath_power_restore_power_state(sc);
 	ATH_UNLOCK(sc);
 
 	/* XXX beacons ? */
 }
 
 void
 ath_shutdown(struct ath_softc *sc)
 {
 
 	ATH_LOCK(sc);
 	ath_stop(sc);
 	ATH_UNLOCK(sc);
 	/* NB: no point powering down chip as we're about to reboot */
 }
 
 /*
  * Interrupt handler.  Most of the actual processing is deferred.
  */
 void
 ath_intr(void *arg)
 {
 	struct ath_softc *sc = arg;
 	struct ath_hal *ah = sc->sc_ah;
 	HAL_INT status = 0;
 	uint32_t txqs;
 
 	/*
 	 * If we're inside a reset path, just print a warning and
 	 * clear the ISR. The reset routine will finish it for us.
 	 */
 	ATH_PCU_LOCK(sc);
 	if (sc->sc_inreset_cnt) {
 		HAL_INT status;
 		ath_hal_getisr(ah, &status);	/* clear ISR */
 		ath_hal_intrset(ah, 0);		/* disable further intr's */
 		DPRINTF(sc, ATH_DEBUG_ANY,
 		    "%s: in reset, ignoring: status=0x%x\n",
 		    __func__, status);
 		ATH_PCU_UNLOCK(sc);
 		return;
 	}
 
 	if (sc->sc_invalid) {
 		/*
 		 * The hardware is not ready/present, don't touch anything.
 		 * Note this can happen early on if the IRQ is shared.
 		 */
 		DPRINTF(sc, ATH_DEBUG_ANY, "%s: invalid; ignored\n", __func__);
 		ATH_PCU_UNLOCK(sc);
 		return;
 	}
 	if (!ath_hal_intrpend(ah)) {		/* shared irq, not for us */
 		ATH_PCU_UNLOCK(sc);
 		return;
 	}
 
 	ATH_LOCK(sc);
 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
 	ATH_UNLOCK(sc);
 
 	if (sc->sc_ic.ic_nrunning == 0 && sc->sc_running == 0) {
 		HAL_INT status;
 
 		DPRINTF(sc, ATH_DEBUG_ANY, "%s: ic_nrunning %d sc_running %d\n",
 		    __func__, sc->sc_ic.ic_nrunning, sc->sc_running);
 		ath_hal_getisr(ah, &status);	/* clear ISR */
 		ath_hal_intrset(ah, 0);		/* disable further intr's */
 		ATH_PCU_UNLOCK(sc);
 
 		ATH_LOCK(sc);
 		ath_power_restore_power_state(sc);
 		ATH_UNLOCK(sc);
 		return;
 	}
 
 	/*
 	 * Figure out the reason(s) for the interrupt.  Note
 	 * that the hal returns a pseudo-ISR that may include
 	 * bits we haven't explicitly enabled so we mask the
 	 * value to insure we only process bits we requested.
 	 */
 	ath_hal_getisr(ah, &status);		/* NB: clears ISR too */
 	DPRINTF(sc, ATH_DEBUG_INTR, "%s: status 0x%x\n", __func__, status);
 	ATH_KTR(sc, ATH_KTR_INTERRUPTS, 1, "ath_intr: mask=0x%.8x", status);
 #ifdef	ATH_DEBUG_ALQ
 	if_ath_alq_post_intr(&sc->sc_alq, status, ah->ah_intrstate,
 	    ah->ah_syncstate);
 #endif	/* ATH_DEBUG_ALQ */
 #ifdef	ATH_KTR_INTR_DEBUG
 	ATH_KTR(sc, ATH_KTR_INTERRUPTS, 5,
 	    "ath_intr: ISR=0x%.8x, ISR_S0=0x%.8x, ISR_S1=0x%.8x, ISR_S2=0x%.8x, ISR_S5=0x%.8x",
 	    ah->ah_intrstate[0],
 	    ah->ah_intrstate[1],
 	    ah->ah_intrstate[2],
 	    ah->ah_intrstate[3],
 	    ah->ah_intrstate[6]);
 #endif
 
 	/* Squirrel away SYNC interrupt debugging */
 	if (ah->ah_syncstate != 0) {
 		int i;
 		for (i = 0; i < 32; i++)
 			if (ah->ah_syncstate & (i << i))
 				sc->sc_intr_stats.sync_intr[i]++;
 	}
 
 	status &= sc->sc_imask;			/* discard unasked for bits */
 
 	/* Short-circuit un-handled interrupts */
 	if (status == 0x0) {
 		ATH_PCU_UNLOCK(sc);
 
 		ATH_LOCK(sc);
 		ath_power_restore_power_state(sc);
 		ATH_UNLOCK(sc);
 
 		return;
 	}
 
 	/*
 	 * Take a note that we're inside the interrupt handler, so
 	 * the reset routines know to wait.
 	 */
 	sc->sc_intr_cnt++;
 	ATH_PCU_UNLOCK(sc);
 
 	/*
 	 * Handle the interrupt. We won't run concurrent with the reset
 	 * or channel change routines as they'll wait for sc_intr_cnt
 	 * to be 0 before continuing.
 	 */
 	if (status & HAL_INT_FATAL) {
 		sc->sc_stats.ast_hardware++;
 		ath_hal_intrset(ah, 0);		/* disable intr's until reset */
 		taskqueue_enqueue(sc->sc_tq, &sc->sc_fataltask);
 	} else {
 		if (status & HAL_INT_SWBA) {
 			/*
 			 * Software beacon alert--time to send a beacon.
 			 * Handle beacon transmission directly; deferring
 			 * this is too slow to meet timing constraints
 			 * under load.
 			 */
 #ifdef IEEE80211_SUPPORT_TDMA
 			if (sc->sc_tdma) {
 				if (sc->sc_tdmaswba == 0) {
 					struct ieee80211com *ic = &sc->sc_ic;
 					struct ieee80211vap *vap =
 					    TAILQ_FIRST(&ic->ic_vaps);
 					ath_tdma_beacon_send(sc, vap);
 					sc->sc_tdmaswba =
 					    vap->iv_tdma->tdma_bintval;
 				} else
 					sc->sc_tdmaswba--;
 			} else
 #endif
 			{
 				ath_beacon_proc(sc, 0);
 #ifdef IEEE80211_SUPPORT_SUPERG
 				/*
 				 * Schedule the rx taskq in case there's no
 				 * traffic so any frames held on the staging
 				 * queue are aged and potentially flushed.
 				 */
 				sc->sc_rx.recv_sched(sc, 1);
 #endif
 			}
 		}
 		if (status & HAL_INT_RXEOL) {
 			int imask;
 			ATH_KTR(sc, ATH_KTR_ERROR, 0, "ath_intr: RXEOL");
 			if (! sc->sc_isedma) {
 				ATH_PCU_LOCK(sc);
 				/*
 				 * NB: the hardware should re-read the link when
 				 *     RXE bit is written, but it doesn't work at
 				 *     least on older hardware revs.
 				 */
 				sc->sc_stats.ast_rxeol++;
 				/*
 				 * Disable RXEOL/RXORN - prevent an interrupt
 				 * storm until the PCU logic can be reset.
 				 * In case the interface is reset some other
 				 * way before "sc_kickpcu" is called, don't
 				 * modify sc_imask - that way if it is reset
 				 * by a call to ath_reset() somehow, the
 				 * interrupt mask will be correctly reprogrammed.
 				 */
 				imask = sc->sc_imask;
 				imask &= ~(HAL_INT_RXEOL | HAL_INT_RXORN);
 				ath_hal_intrset(ah, imask);
 				/*
 				 * Only blank sc_rxlink if we've not yet kicked
 				 * the PCU.
 				 *
 				 * This isn't entirely correct - the correct solution
 				 * would be to have a PCU lock and engage that for
 				 * the duration of the PCU fiddling; which would include
 				 * running the RX process. Otherwise we could end up
 				 * messing up the RX descriptor chain and making the
 				 * RX desc list much shorter.
 				 */
 				if (! sc->sc_kickpcu)
 					sc->sc_rxlink = NULL;
 				sc->sc_kickpcu = 1;
 				ATH_PCU_UNLOCK(sc);
 			}
 			/*
 			 * Enqueue an RX proc to handle whatever
 			 * is in the RX queue.
 			 * This will then kick the PCU if required.
 			 */
 			sc->sc_rx.recv_sched(sc, 1);
 		}
 		if (status & HAL_INT_TXURN) {
 			sc->sc_stats.ast_txurn++;
 			/* bump tx trigger level */
 			ath_hal_updatetxtriglevel(ah, AH_TRUE);
 		}
 		/*
 		 * Handle both the legacy and RX EDMA interrupt bits.
 		 * Note that HAL_INT_RXLP is also HAL_INT_RXDESC.
 		 */
 		if (status & (HAL_INT_RX | HAL_INT_RXHP | HAL_INT_RXLP)) {
 			sc->sc_stats.ast_rx_intr++;
 			sc->sc_rx.recv_sched(sc, 1);
 		}
 		if (status & HAL_INT_TX) {
 			sc->sc_stats.ast_tx_intr++;
 			/*
 			 * Grab all the currently set bits in the HAL txq bitmap
 			 * and blank them. This is the only place we should be
 			 * doing this.
 			 */
 			if (! sc->sc_isedma) {
 				ATH_PCU_LOCK(sc);
 				txqs = 0xffffffff;
 				ath_hal_gettxintrtxqs(sc->sc_ah, &txqs);
 				ATH_KTR(sc, ATH_KTR_INTERRUPTS, 3,
 				    "ath_intr: TX; txqs=0x%08x, txq_active was 0x%08x, now 0x%08x",
 				    txqs,
 				    sc->sc_txq_active,
 				    sc->sc_txq_active | txqs);
 				sc->sc_txq_active |= txqs;
 				ATH_PCU_UNLOCK(sc);
 			}
 			taskqueue_enqueue(sc->sc_tq, &sc->sc_txtask);
 		}
 		if (status & HAL_INT_BMISS) {
 			sc->sc_stats.ast_bmiss++;
 			taskqueue_enqueue(sc->sc_tq, &sc->sc_bmisstask);
 		}
 		if (status & HAL_INT_GTT)
 			sc->sc_stats.ast_tx_timeout++;
 		if (status & HAL_INT_CST)
 			sc->sc_stats.ast_tx_cst++;
 		if (status & HAL_INT_MIB) {
 			sc->sc_stats.ast_mib++;
 			ATH_PCU_LOCK(sc);
 			/*
 			 * Disable interrupts until we service the MIB
 			 * interrupt; otherwise it will continue to fire.
 			 */
 			ath_hal_intrset(ah, 0);
 			/*
 			 * Let the hal handle the event.  We assume it will
 			 * clear whatever condition caused the interrupt.
 			 */
 			ath_hal_mibevent(ah, &sc->sc_halstats);
 			/*
 			 * Don't reset the interrupt if we've just
 			 * kicked the PCU, or we may get a nested
 			 * RXEOL before the rxproc has had a chance
 			 * to run.
 			 */
 			if (sc->sc_kickpcu == 0)
 				ath_hal_intrset(ah, sc->sc_imask);
 			ATH_PCU_UNLOCK(sc);
 		}
 		if (status & HAL_INT_RXORN) {
 			/* NB: hal marks HAL_INT_FATAL when RXORN is fatal */
 			ATH_KTR(sc, ATH_KTR_ERROR, 0, "ath_intr: RXORN");
 			sc->sc_stats.ast_rxorn++;
 		}
 		if (status & HAL_INT_TSFOOR) {
+			/* out of range beacon - wake the chip up,
+			 * but don't modify self-gen frame config */
 			device_printf(sc->sc_dev, "%s: TSFOOR\n", __func__);
 			sc->sc_syncbeacon = 1;
+			ATH_LOCK(sc);
+			ath_power_setpower(sc, HAL_PM_AWAKE, 0);
+			ATH_UNLOCK(sc);
 		}
 		if (status & HAL_INT_MCI) {
 			ath_btcoex_mci_intr(sc);
 		}
 	}
 	ATH_PCU_LOCK(sc);
 	sc->sc_intr_cnt--;
 	ATH_PCU_UNLOCK(sc);
 
 	ATH_LOCK(sc);
 	ath_power_restore_power_state(sc);
 	ATH_UNLOCK(sc);
 }
 
 static void
 ath_fatal_proc(void *arg, int pending)
 {
 	struct ath_softc *sc = arg;
 	u_int32_t *state;
 	u_int32_t len;
 	void *sp;
 
 	if (sc->sc_invalid)
 		return;
 
 	device_printf(sc->sc_dev, "hardware error; resetting\n");
 	/*
 	 * Fatal errors are unrecoverable.  Typically these
 	 * are caused by DMA errors.  Collect h/w state from
 	 * the hal so we can diagnose what's going on.
 	 */
 	if (ath_hal_getfatalstate(sc->sc_ah, &sp, &len)) {
 		KASSERT(len >= 6*sizeof(u_int32_t), ("len %u bytes", len));
 		state = sp;
 		device_printf(sc->sc_dev,
 		    "0x%08x 0x%08x 0x%08x, 0x%08x 0x%08x 0x%08x\n", state[0],
 		    state[1] , state[2], state[3], state[4], state[5]);
 	}
 	ath_reset(sc, ATH_RESET_NOLOSS);
 }
 
 static void
 ath_bmiss_vap(struct ieee80211vap *vap)
 {
 	struct ath_softc *sc = vap->iv_ic->ic_softc;
 
 	/*
 	 * Workaround phantom bmiss interrupts by sanity-checking
 	 * the time of our last rx'd frame.  If it is within the
 	 * beacon miss interval then ignore the interrupt.  If it's
 	 * truly a bmiss we'll get another interrupt soon and that'll
 	 * be dispatched up for processing.  Note this applies only
 	 * for h/w beacon miss events.
 	 */
 
 	/*
 	 * XXX TODO: Just read the TSF during the interrupt path;
 	 * that way we don't have to wake up again just to read it
 	 * again.
 	 */
 	ATH_LOCK(sc);
 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
 	ATH_UNLOCK(sc);
 
 	if ((vap->iv_flags_ext & IEEE80211_FEXT_SWBMISS) == 0) {
 		u_int64_t lastrx = sc->sc_lastrx;
 		u_int64_t tsf = ath_hal_gettsf64(sc->sc_ah);
 		/* XXX should take a locked ref to iv_bss */
 		u_int bmisstimeout =
 			vap->iv_bmissthreshold * vap->iv_bss->ni_intval * 1024;
 
 		DPRINTF(sc, ATH_DEBUG_BEACON,
 		    "%s: tsf %llu lastrx %lld (%llu) bmiss %u\n",
 		    __func__, (unsigned long long) tsf,
 		    (unsigned long long)(tsf - lastrx),
 		    (unsigned long long) lastrx, bmisstimeout);
 
 		if (tsf - lastrx <= bmisstimeout) {
 			sc->sc_stats.ast_bmiss_phantom++;
 
 			ATH_LOCK(sc);
 			ath_power_restore_power_state(sc);
 			ATH_UNLOCK(sc);
 
 			return;
 		}
 	}
 
 	/*
-	 * There's no need to keep the hardware awake during the call
-	 * to av_bmiss().
+	 * Keep the hardware awake if it's asleep (and leave self-gen
+	 * frame config alone) until the next beacon, so we can resync
+	 * against the next beacon.
+	 *
+	 * This handles three common beacon miss cases in STA powersave mode -
+	 * (a) the beacon TBTT isnt a multiple of bintval;
+	 * (b) the beacon was missed; and
+	 * (c) the beacons are being delayed because the AP is busy and
+	 *     isn't reliably able to meet its TBTT.
 	 */
 	ATH_LOCK(sc);
+	ath_power_setpower(sc, HAL_PM_AWAKE, 0);
 	ath_power_restore_power_state(sc);
 	ATH_UNLOCK(sc);
+	DPRINTF(sc, ATH_DEBUG_BEACON,
+	    "%s: forced awake; force syncbeacon=1\n", __func__);
 
 	/*
 	 * Attempt to force a beacon resync.
 	 */
 	sc->sc_syncbeacon = 1;
 
 	ATH_VAP(vap)->av_bmiss(vap);
 }
 
 /* XXX this needs a force wakeup! */
 int
 ath_hal_gethangstate(struct ath_hal *ah, uint32_t mask, uint32_t *hangs)
 {
 	uint32_t rsize;
 	void *sp;
 
 	if (!ath_hal_getdiagstate(ah, HAL_DIAG_CHECK_HANGS, &mask, sizeof(mask), &sp, &rsize))
 		return 0;
 	KASSERT(rsize == sizeof(uint32_t), ("resultsize %u", rsize));
 	*hangs = *(uint32_t *)sp;
 	return 1;
 }
 
 static void
 ath_bmiss_proc(void *arg, int pending)
 {
 	struct ath_softc *sc = arg;
 	uint32_t hangs;
 
 	DPRINTF(sc, ATH_DEBUG_ANY, "%s: pending %u\n", __func__, pending);
 
 	ATH_LOCK(sc);
 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
 	ATH_UNLOCK(sc);
 
 	ath_beacon_miss(sc);
 
 	/*
 	 * Do a reset upon any becaon miss event.
 	 *
 	 * It may be a non-recognised RX clear hang which needs a reset
 	 * to clear.
 	 */
 	if (ath_hal_gethangstate(sc->sc_ah, 0xff, &hangs) && hangs != 0) {
 		ath_reset(sc, ATH_RESET_NOLOSS);
 		device_printf(sc->sc_dev,
 		    "bb hang detected (0x%x), resetting\n", hangs);
 	} else {
 		ath_reset(sc, ATH_RESET_NOLOSS);
 		ieee80211_beacon_miss(&sc->sc_ic);
 	}
 
 	/* Force a beacon resync, in case they've drifted */
 	sc->sc_syncbeacon = 1;
 
 	ATH_LOCK(sc);
 	ath_power_restore_power_state(sc);
 	ATH_UNLOCK(sc);
 }
 
 /*
  * Handle TKIP MIC setup to deal hardware that doesn't do MIC
  * calcs together with WME.  If necessary disable the crypto
  * hardware and mark the 802.11 state so keys will be setup
  * with the MIC work done in software.
  */
 static void
 ath_settkipmic(struct ath_softc *sc)
 {
 	struct ieee80211com *ic = &sc->sc_ic;
 
 	if ((ic->ic_cryptocaps & IEEE80211_CRYPTO_TKIP) && !sc->sc_wmetkipmic) {
 		if (ic->ic_flags & IEEE80211_F_WME) {
 			ath_hal_settkipmic(sc->sc_ah, AH_FALSE);
 			ic->ic_cryptocaps &= ~IEEE80211_CRYPTO_TKIPMIC;
 		} else {
 			ath_hal_settkipmic(sc->sc_ah, AH_TRUE);
 			ic->ic_cryptocaps |= IEEE80211_CRYPTO_TKIPMIC;
 		}
 	}
 }
 
 static int
 ath_init(struct ath_softc *sc)
 {
 	struct ieee80211com *ic = &sc->sc_ic;
 	struct ath_hal *ah = sc->sc_ah;
 	HAL_STATUS status;
 
 	ATH_LOCK_ASSERT(sc);
 
 	/*
 	 * Force the sleep state awake.
 	 */
 	ath_power_setselfgen(sc, HAL_PM_AWAKE);
 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
-	ath_power_setpower(sc, HAL_PM_AWAKE);
+	ath_power_setpower(sc, HAL_PM_AWAKE, 1);
 
 	/*
 	 * Stop anything previously setup.  This is safe
 	 * whether this is the first time through or not.
 	 */
 	ath_stop(sc);
 
 	/*
 	 * The basic interface to setting the hardware in a good
 	 * state is ``reset''.  On return the hardware is known to
 	 * be powered up and with interrupts disabled.  This must
 	 * be followed by initialization of the appropriate bits
 	 * and then setup of the interrupt mask.
 	 */
 	ath_settkipmic(sc);
 	ath_update_chainmasks(sc, ic->ic_curchan);
 	ath_hal_setchainmasks(sc->sc_ah, sc->sc_cur_txchainmask,
 	    sc->sc_cur_rxchainmask);
 
 	if (!ath_hal_reset(ah, sc->sc_opmode, ic->ic_curchan, AH_FALSE,
 	    HAL_RESET_NORMAL, &status)) {
 		device_printf(sc->sc_dev,
 		    "unable to reset hardware; hal status %u\n", status);
 		return (ENODEV);
 	}
 
 	ATH_RX_LOCK(sc);
 	sc->sc_rx_stopped = 1;
 	sc->sc_rx_resetted = 1;
 	ATH_RX_UNLOCK(sc);
 
 	ath_chan_change(sc, ic->ic_curchan);
 
 	/* Let DFS at it in case it's a DFS channel */
 	ath_dfs_radar_enable(sc, ic->ic_curchan);
 
 	/* Let spectral at in case spectral is enabled */
 	ath_spectral_enable(sc, ic->ic_curchan);
 
 	/*
 	 * Let bluetooth coexistence at in case it's needed for this channel
 	 */
 	ath_btcoex_enable(sc, ic->ic_curchan);
 
 	/*
 	 * If we're doing TDMA, enforce the TXOP limitation for chips that
 	 * support it.
 	 */
 	if (sc->sc_hasenforcetxop && sc->sc_tdma)
 		ath_hal_setenforcetxop(sc->sc_ah, 1);
 	else
 		ath_hal_setenforcetxop(sc->sc_ah, 0);
 
 	/*
 	 * Likewise this is set during reset so update
 	 * state cached in the driver.
 	 */
 	sc->sc_diversity = ath_hal_getdiversity(ah);
 	sc->sc_lastlongcal = ticks;
 	sc->sc_resetcal = 1;
 	sc->sc_lastcalreset = 0;
 	sc->sc_lastani = ticks;
 	sc->sc_lastshortcal = ticks;
 	sc->sc_doresetcal = AH_FALSE;
 	/*
 	 * Beacon timers were cleared here; give ath_newstate()
 	 * a hint that the beacon timers should be poked when
 	 * things transition to the RUN state.
 	 */
 	sc->sc_beacons = 0;
 
 	/*
 	 * Setup the hardware after reset: the key cache
 	 * is filled as needed and the receive engine is
 	 * set going.  Frame transmit is handled entirely
 	 * in the frame output path; there's nothing to do
 	 * here except setup the interrupt mask.
 	 */
 	if (ath_startrecv(sc) != 0) {
 		device_printf(sc->sc_dev, "unable to start recv logic\n");
 		ath_power_restore_power_state(sc);
 		return (ENODEV);
 	}
 
 	/*
 	 * Enable interrupts.
 	 */
 	sc->sc_imask = HAL_INT_RX | HAL_INT_TX
 		  | HAL_INT_RXORN | HAL_INT_TXURN
 		  | HAL_INT_FATAL | HAL_INT_GLOBAL;
 
 	/*
 	 * Enable RX EDMA bits.  Note these overlap with
 	 * HAL_INT_RX and HAL_INT_RXDESC respectively.
 	 */
 	if (sc->sc_isedma)
 		sc->sc_imask |= (HAL_INT_RXHP | HAL_INT_RXLP);
 
 	/*
 	 * If we're an EDMA NIC, we don't care about RXEOL.
 	 * Writing a new descriptor in will simply restart
 	 * RX DMA.
 	 */
 	if (! sc->sc_isedma)
 		sc->sc_imask |= HAL_INT_RXEOL;
 
 	/*
 	 * Enable MCI interrupt for MCI devices.
 	 */
 	if (sc->sc_btcoex_mci)
 		sc->sc_imask |= HAL_INT_MCI;
 
 	/*
 	 * Enable MIB interrupts when there are hardware phy counters.
 	 * Note we only do this (at the moment) for station mode.
 	 */
 	if (sc->sc_needmib && ic->ic_opmode == IEEE80211_M_STA)
 		sc->sc_imask |= HAL_INT_MIB;
 
 	/*
 	 * XXX add capability for this.
 	 *
 	 * If we're in STA mode (and maybe IBSS?) then register for
 	 * TSFOOR interrupts.
 	 */
 	if (ic->ic_opmode == IEEE80211_M_STA)
 		sc->sc_imask |= HAL_INT_TSFOOR;
 
 	/* Enable global TX timeout and carrier sense timeout if available */
 	if (ath_hal_gtxto_supported(ah))
 		sc->sc_imask |= HAL_INT_GTT;
 
 	DPRINTF(sc, ATH_DEBUG_RESET, "%s: imask=0x%x\n",
 		__func__, sc->sc_imask);
 
 	sc->sc_running = 1;
 	callout_reset(&sc->sc_wd_ch, hz, ath_watchdog, sc);
 	ath_hal_intrset(ah, sc->sc_imask);
 
 	ath_power_restore_power_state(sc);
 
 	return (0);
 }
 
 static void
 ath_stop(struct ath_softc *sc)
 {
 	struct ath_hal *ah = sc->sc_ah;
 
 	ATH_LOCK_ASSERT(sc);
 
 	/*
 	 * Wake the hardware up before fiddling with it.
 	 */
 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
 
 	if (sc->sc_running) {
 		/*
 		 * Shutdown the hardware and driver:
 		 *    reset 802.11 state machine
 		 *    turn off timers
 		 *    disable interrupts
 		 *    turn off the radio
 		 *    clear transmit machinery
 		 *    clear receive machinery
 		 *    drain and release tx queues
 		 *    reclaim beacon resources
 		 *    power down hardware
 		 *
 		 * Note that some of this work is not possible if the
 		 * hardware is gone (invalid).
 		 */
 #ifdef ATH_TX99_DIAG
 		if (sc->sc_tx99 != NULL)
 			sc->sc_tx99->stop(sc->sc_tx99);
 #endif
 		callout_stop(&sc->sc_wd_ch);
 		sc->sc_wd_timer = 0;
 		sc->sc_running = 0;
 		if (!sc->sc_invalid) {
 			if (sc->sc_softled) {
 				callout_stop(&sc->sc_ledtimer);
 				ath_hal_gpioset(ah, sc->sc_ledpin,
 					!sc->sc_ledon);
 				sc->sc_blinking = 0;
 			}
 			ath_hal_intrset(ah, 0);
 		}
 		/* XXX we should stop RX regardless of whether it's valid */
 		if (!sc->sc_invalid) {
 			ath_stoprecv(sc, 1);
 			ath_hal_phydisable(ah);
 		} else
 			sc->sc_rxlink = NULL;
 		ath_draintxq(sc, ATH_RESET_DEFAULT);
 		ath_beacon_free(sc);	/* XXX not needed */
 	}
 
 	/* And now, restore the current power state */
 	ath_power_restore_power_state(sc);
 }
 
 /*
  * Wait until all pending TX/RX has completed.
  *
  * This waits until all existing transmit, receive and interrupts
  * have completed.  It's assumed that the caller has first
  * grabbed the reset lock so it doesn't try to do overlapping
  * chip resets.
  */
 #define	MAX_TXRX_ITERATIONS	100
 static void
 ath_txrx_stop_locked(struct ath_softc *sc)
 {
 	int i = MAX_TXRX_ITERATIONS;
 
 	ATH_UNLOCK_ASSERT(sc);
 	ATH_PCU_LOCK_ASSERT(sc);
 
 	/*
 	 * Sleep until all the pending operations have completed.
 	 *
 	 * The caller must ensure that reset has been incremented
 	 * or the pending operations may continue being queued.
 	 */
 	while (sc->sc_rxproc_cnt || sc->sc_txproc_cnt ||
 	    sc->sc_txstart_cnt || sc->sc_intr_cnt) {
 		if (i <= 0)
 			break;
 		msleep(sc, &sc->sc_pcu_mtx, 0, "ath_txrx_stop",
 		    msecs_to_ticks(10));
 		i--;
 	}
 
 	if (i <= 0)
 		device_printf(sc->sc_dev,
 		    "%s: didn't finish after %d iterations\n",
 		    __func__, MAX_TXRX_ITERATIONS);
 }
 #undef	MAX_TXRX_ITERATIONS
 
 #if 0
 static void
 ath_txrx_stop(struct ath_softc *sc)
 {
 	ATH_UNLOCK_ASSERT(sc);
 	ATH_PCU_UNLOCK_ASSERT(sc);
 
 	ATH_PCU_LOCK(sc);
 	ath_txrx_stop_locked(sc);
 	ATH_PCU_UNLOCK(sc);
 }
 #endif
 
 static void
 ath_txrx_start(struct ath_softc *sc)
 {
 
 	taskqueue_unblock(sc->sc_tq);
 }
 
 /*
  * Grab the reset lock, and wait around until no one else
  * is trying to do anything with it.
  *
  * This is totally horrible but we can't hold this lock for
  * long enough to do TX/RX or we end up with net80211/ip stack
  * LORs and eventual deadlock.
  *
  * "dowait" signals whether to spin, waiting for the reset
  * lock count to reach 0. This should (for now) only be used
  * during the reset path, as the rest of the code may not
  * be locking-reentrant enough to behave correctly.
  *
  * Another, cleaner way should be found to serialise all of
  * these operations.
  */
 #define	MAX_RESET_ITERATIONS	25
 static int
 ath_reset_grablock(struct ath_softc *sc, int dowait)
 {
 	int w = 0;
 	int i = MAX_RESET_ITERATIONS;
 
 	ATH_PCU_LOCK_ASSERT(sc);
 	do {
 		if (sc->sc_inreset_cnt == 0) {
 			w = 1;
 			break;
 		}
 		if (dowait == 0) {
 			w = 0;
 			break;
 		}
 		ATH_PCU_UNLOCK(sc);
 		/*
 		 * 1 tick is likely not enough time for long calibrations
 		 * to complete.  So we should wait quite a while.
 		 */
 		pause("ath_reset_grablock", msecs_to_ticks(100));
 		i--;
 		ATH_PCU_LOCK(sc);
 	} while (i > 0);
 
 	/*
 	 * We always increment the refcounter, regardless
 	 * of whether we succeeded to get it in an exclusive
 	 * way.
 	 */
 	sc->sc_inreset_cnt++;
 
 	if (i <= 0)
 		device_printf(sc->sc_dev,
 		    "%s: didn't finish after %d iterations\n",
 		    __func__, MAX_RESET_ITERATIONS);
 
 	if (w == 0)
 		device_printf(sc->sc_dev,
 		    "%s: warning, recursive reset path!\n",
 		    __func__);
 
 	return w;
 }
 #undef MAX_RESET_ITERATIONS
 
 /*
  * Reset the hardware w/o losing operational state.  This is
  * basically a more efficient way of doing ath_stop, ath_init,
  * followed by state transitions to the current 802.11
  * operational state.  Used to recover from various errors and
  * to reset or reload hardware state.
  */
 int
 ath_reset(struct ath_softc *sc, ATH_RESET_TYPE reset_type)
 {
 	struct ieee80211com *ic = &sc->sc_ic;
 	struct ath_hal *ah = sc->sc_ah;
 	HAL_STATUS status;
 	int i;
 
 	DPRINTF(sc, ATH_DEBUG_RESET, "%s: called\n", __func__);
 
 	/* Ensure ATH_LOCK isn't held; ath_rx_proc can't be locked */
 	ATH_PCU_UNLOCK_ASSERT(sc);
 	ATH_UNLOCK_ASSERT(sc);
 
 	/* Try to (stop any further TX/RX from occurring */
 	taskqueue_block(sc->sc_tq);
 
 	/*
 	 * Wake the hardware up.
 	 */
 	ATH_LOCK(sc);
 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
 	ATH_UNLOCK(sc);
 
 	ATH_PCU_LOCK(sc);
 
 	/*
 	 * Grab the reset lock before TX/RX is stopped.
 	 *
 	 * This is needed to ensure that when the TX/RX actually does finish,
 	 * no further TX/RX/reset runs in parallel with this.
 	 */
 	if (ath_reset_grablock(sc, 1) == 0) {
 		device_printf(sc->sc_dev, "%s: concurrent reset! Danger!\n",
 		    __func__);
 	}
 
 	/* disable interrupts */
 	ath_hal_intrset(ah, 0);
 
 	/*
 	 * Now, ensure that any in progress TX/RX completes before we
 	 * continue.
 	 */
 	ath_txrx_stop_locked(sc);
 
 	ATH_PCU_UNLOCK(sc);
 
 	/*
 	 * Regardless of whether we're doing a no-loss flush or
 	 * not, stop the PCU and handle what's in the RX queue.
 	 * That way frames aren't dropped which shouldn't be.
 	 */
 	ath_stoprecv(sc, (reset_type != ATH_RESET_NOLOSS));
 	ath_rx_flush(sc);
 
 	/*
 	 * Should now wait for pending TX/RX to complete
 	 * and block future ones from occurring. This needs to be
 	 * done before the TX queue is drained.
 	 */
 	ath_draintxq(sc, reset_type);	/* stop xmit side */
 
 	ath_settkipmic(sc);		/* configure TKIP MIC handling */
 	/* NB: indicate channel change so we do a full reset */
 	ath_update_chainmasks(sc, ic->ic_curchan);
 	ath_hal_setchainmasks(sc->sc_ah, sc->sc_cur_txchainmask,
 	    sc->sc_cur_rxchainmask);
 	if (!ath_hal_reset(ah, sc->sc_opmode, ic->ic_curchan, AH_TRUE,
 	    HAL_RESET_NORMAL, &status))
 		device_printf(sc->sc_dev,
 		    "%s: unable to reset hardware; hal status %u\n",
 		    __func__, status);
 	sc->sc_diversity = ath_hal_getdiversity(ah);
 
 	ATH_RX_LOCK(sc);
 	sc->sc_rx_stopped = 1;
 	sc->sc_rx_resetted = 1;
 	ATH_RX_UNLOCK(sc);
 
 	/* Let DFS at it in case it's a DFS channel */
 	ath_dfs_radar_enable(sc, ic->ic_curchan);
 
 	/* Let spectral at in case spectral is enabled */
 	ath_spectral_enable(sc, ic->ic_curchan);
 
 	/*
 	 * Let bluetooth coexistence at in case it's needed for this channel
 	 */
 	ath_btcoex_enable(sc, ic->ic_curchan);
 
 	/*
 	 * If we're doing TDMA, enforce the TXOP limitation for chips that
 	 * support it.
 	 */
 	if (sc->sc_hasenforcetxop && sc->sc_tdma)
 		ath_hal_setenforcetxop(sc->sc_ah, 1);
 	else
 		ath_hal_setenforcetxop(sc->sc_ah, 0);
 
 	if (ath_startrecv(sc) != 0)	/* restart recv */
 		device_printf(sc->sc_dev,
 		    "%s: unable to start recv logic\n", __func__);
 	/*
 	 * We may be doing a reset in response to an ioctl
 	 * that changes the channel so update any state that
 	 * might change as a result.
 	 */
 	ath_chan_change(sc, ic->ic_curchan);
 	if (sc->sc_beacons) {		/* restart beacons */
 #ifdef IEEE80211_SUPPORT_TDMA
 		if (sc->sc_tdma)
 			ath_tdma_config(sc, NULL);
 		else
 #endif
 			ath_beacon_config(sc, NULL);
 	}
 
 	/*
 	 * Release the reset lock and re-enable interrupts here.
 	 * If an interrupt was being processed in ath_intr(),
 	 * it would disable interrupts at this point. So we have
 	 * to atomically enable interrupts and decrement the
 	 * reset counter - this way ath_intr() doesn't end up
 	 * disabling interrupts without a corresponding enable
 	 * in the rest or channel change path.
 	 *
 	 * Grab the TX reference in case we need to transmit.
 	 * That way a parallel transmit doesn't.
 	 */
 	ATH_PCU_LOCK(sc);
 	sc->sc_inreset_cnt--;
 	sc->sc_txstart_cnt++;
 	/* XXX only do this if sc_inreset_cnt == 0? */
 	ath_hal_intrset(ah, sc->sc_imask);
 	ATH_PCU_UNLOCK(sc);
 
 	/*
 	 * TX and RX can be started here. If it were started with
 	 * sc_inreset_cnt > 0, the TX and RX path would abort.
 	 * Thus if this is a nested call through the reset or
 	 * channel change code, TX completion will occur but
 	 * RX completion and ath_start / ath_tx_start will not
 	 * run.
 	 */
 
 	/* Restart TX/RX as needed */
 	ath_txrx_start(sc);
 
 	/* XXX TODO: we need to hold the tx refcount here! */
 
 	/* Restart TX completion and pending TX */
 	if (reset_type == ATH_RESET_NOLOSS) {
 		for (i = 0; i < HAL_NUM_TX_QUEUES; i++) {
 			if (ATH_TXQ_SETUP(sc, i)) {
 				ATH_TXQ_LOCK(&sc->sc_txq[i]);
 				ath_txq_restart_dma(sc, &sc->sc_txq[i]);
 				ATH_TXQ_UNLOCK(&sc->sc_txq[i]);
 
 				ATH_TX_LOCK(sc);
 				ath_txq_sched(sc, &sc->sc_txq[i]);
 				ATH_TX_UNLOCK(sc);
 			}
 		}
 	}
 
 	ATH_LOCK(sc);
 	ath_power_restore_power_state(sc);
 	ATH_UNLOCK(sc);
 
 	ATH_PCU_LOCK(sc);
 	sc->sc_txstart_cnt--;
 	ATH_PCU_UNLOCK(sc);
 
 	/* Handle any frames in the TX queue */
 	/*
 	 * XXX should this be done by the caller, rather than
 	 * ath_reset() ?
 	 */
 	ath_tx_kick(sc);		/* restart xmit */
 	return 0;
 }
 
 static int
 ath_reset_vap(struct ieee80211vap *vap, u_long cmd)
 {
 	struct ieee80211com *ic = vap->iv_ic;
 	struct ath_softc *sc = ic->ic_softc;
 	struct ath_hal *ah = sc->sc_ah;
 
 	switch (cmd) {
 	case IEEE80211_IOC_TXPOWER:
 		/*
 		 * If per-packet TPC is enabled, then we have nothing
 		 * to do; otherwise we need to force the global limit.
 		 * All this can happen directly; no need to reset.
 		 */
 		if (!ath_hal_gettpc(ah))
 			ath_hal_settxpowlimit(ah, ic->ic_txpowlimit);
 		return 0;
 	}
 	/* XXX? Full or NOLOSS? */
 	return ath_reset(sc, ATH_RESET_FULL);
 }
 
 struct ath_buf *
 _ath_getbuf_locked(struct ath_softc *sc, ath_buf_type_t btype)
 {
 	struct ath_buf *bf;
 
 	ATH_TXBUF_LOCK_ASSERT(sc);
 
 	if (btype == ATH_BUFTYPE_MGMT)
 		bf = TAILQ_FIRST(&sc->sc_txbuf_mgmt);
 	else
 		bf = TAILQ_FIRST(&sc->sc_txbuf);
 
 	if (bf == NULL) {
 		sc->sc_stats.ast_tx_getnobuf++;
 	} else {
 		if (bf->bf_flags & ATH_BUF_BUSY) {
 			sc->sc_stats.ast_tx_getbusybuf++;
 			bf = NULL;
 		}
 	}
 
 	if (bf != NULL && (bf->bf_flags & ATH_BUF_BUSY) == 0) {
 		if (btype == ATH_BUFTYPE_MGMT)
 			TAILQ_REMOVE(&sc->sc_txbuf_mgmt, bf, bf_list);
 		else {
 			TAILQ_REMOVE(&sc->sc_txbuf, bf, bf_list);
 			sc->sc_txbuf_cnt--;
 
 			/*
 			 * This shuldn't happen; however just to be
 			 * safe print a warning and fudge the txbuf
 			 * count.
 			 */
 			if (sc->sc_txbuf_cnt < 0) {
 				device_printf(sc->sc_dev,
 				    "%s: sc_txbuf_cnt < 0?\n",
 				    __func__);
 				sc->sc_txbuf_cnt = 0;
 			}
 		}
 	} else
 		bf = NULL;
 
 	if (bf == NULL) {
 		/* XXX should check which list, mgmt or otherwise */
 		DPRINTF(sc, ATH_DEBUG_XMIT, "%s: %s\n", __func__,
 		    TAILQ_FIRST(&sc->sc_txbuf) == NULL ?
 			"out of xmit buffers" : "xmit buffer busy");
 		return NULL;
 	}
 
 	/* XXX TODO: should do this at buffer list initialisation */
 	/* XXX (then, ensure the buffer has the right flag set) */
 	bf->bf_flags = 0;
 	if (btype == ATH_BUFTYPE_MGMT)
 		bf->bf_flags |= ATH_BUF_MGMT;
 	else
 		bf->bf_flags &= (~ATH_BUF_MGMT);
 
 	/* Valid bf here; clear some basic fields */
 	bf->bf_next = NULL;	/* XXX just to be sure */
 	bf->bf_last = NULL;	/* XXX again, just to be sure */
 	bf->bf_comp = NULL;	/* XXX again, just to be sure */
 	bzero(&bf->bf_state, sizeof(bf->bf_state));
 
 	/*
 	 * Track the descriptor ID only if doing EDMA
 	 */
 	if (sc->sc_isedma) {
 		bf->bf_descid = sc->sc_txbuf_descid;
 		sc->sc_txbuf_descid++;
 	}
 
 	return bf;
 }
 
 /*
  * When retrying a software frame, buffers marked ATH_BUF_BUSY
  * can't be thrown back on the queue as they could still be
  * in use by the hardware.
  *
  * This duplicates the buffer, or returns NULL.
  *
  * The descriptor is also copied but the link pointers and
  * the DMA segments aren't copied; this frame should thus
  * be again passed through the descriptor setup/chain routines
  * so the link is correct.
  *
  * The caller must free the buffer using ath_freebuf().
  */
 struct ath_buf *
 ath_buf_clone(struct ath_softc *sc, struct ath_buf *bf)
 {
 	struct ath_buf *tbf;
 
 	tbf = ath_getbuf(sc,
 	    (bf->bf_flags & ATH_BUF_MGMT) ?
 	     ATH_BUFTYPE_MGMT : ATH_BUFTYPE_NORMAL);
 	if (tbf == NULL)
 		return NULL;	/* XXX failure? Why? */
 
 	/* Copy basics */
 	tbf->bf_next = NULL;
 	tbf->bf_nseg = bf->bf_nseg;
 	tbf->bf_flags = bf->bf_flags & ATH_BUF_FLAGS_CLONE;
 	tbf->bf_status = bf->bf_status;
 	tbf->bf_m = bf->bf_m;
 	tbf->bf_node = bf->bf_node;
 	KASSERT((bf->bf_node != NULL), ("%s: bf_node=NULL!", __func__));
 	/* will be setup by the chain/setup function */
 	tbf->bf_lastds = NULL;
 	/* for now, last == self */
 	tbf->bf_last = tbf;
 	tbf->bf_comp = bf->bf_comp;
 
 	/* NOTE: DMA segments will be setup by the setup/chain functions */
 
 	/* The caller has to re-init the descriptor + links */
 
 	/*
 	 * Free the DMA mapping here, before we NULL the mbuf.
 	 * We must only call bus_dmamap_unload() once per mbuf chain
 	 * or behaviour is undefined.
 	 */
 	if (bf->bf_m != NULL) {
 		/*
 		 * XXX is this POSTWRITE call required?
 		 */
 		bus_dmamap_sync(sc->sc_dmat, bf->bf_dmamap,
 		    BUS_DMASYNC_POSTWRITE);
 		bus_dmamap_unload(sc->sc_dmat, bf->bf_dmamap);
 	}
 
 	bf->bf_m = NULL;
 	bf->bf_node = NULL;
 
 	/* Copy state */
 	memcpy(&tbf->bf_state, &bf->bf_state, sizeof(bf->bf_state));
 
 	return tbf;
 }
 
 struct ath_buf *
 ath_getbuf(struct ath_softc *sc, ath_buf_type_t btype)
 {
 	struct ath_buf *bf;
 
 	ATH_TXBUF_LOCK(sc);
 	bf = _ath_getbuf_locked(sc, btype);
 	/*
 	 * If a mgmt buffer was requested but we're out of those,
 	 * try requesting a normal one.
 	 */
 	if (bf == NULL && btype == ATH_BUFTYPE_MGMT)
 		bf = _ath_getbuf_locked(sc, ATH_BUFTYPE_NORMAL);
 	ATH_TXBUF_UNLOCK(sc);
 	if (bf == NULL) {
 		DPRINTF(sc, ATH_DEBUG_XMIT, "%s: stop queue\n", __func__);
 		sc->sc_stats.ast_tx_qstop++;
 	}
 	return bf;
 }
 
 /*
  * Transmit a single frame.
  *
  * net80211 will free the node reference if the transmit
  * fails, so don't free the node reference here.
  */
 static int
 ath_transmit(struct ieee80211com *ic, struct mbuf *m)
 {
 	struct ath_softc *sc = ic->ic_softc;
 	struct ieee80211_node *ni;
 	struct mbuf *next;
 	struct ath_buf *bf;
 	ath_bufhead frags;
 	int retval = 0;
 
 	/*
 	 * Tell the reset path that we're currently transmitting.
 	 */
 	ATH_PCU_LOCK(sc);
 	if (sc->sc_inreset_cnt > 0) {
 		DPRINTF(sc, ATH_DEBUG_XMIT,
 		    "%s: sc_inreset_cnt > 0; bailing\n", __func__);
 		ATH_PCU_UNLOCK(sc);
 		sc->sc_stats.ast_tx_qstop++;
 		ATH_KTR(sc, ATH_KTR_TX, 0, "ath_start_task: OACTIVE, finish");
 		return (ENOBUFS);	/* XXX should be EINVAL or? */
 	}
 	sc->sc_txstart_cnt++;
 	ATH_PCU_UNLOCK(sc);
 
 	/* Wake the hardware up already */
 	ATH_LOCK(sc);
 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
 	ATH_UNLOCK(sc);
 
 	ATH_KTR(sc, ATH_KTR_TX, 0, "ath_transmit: start");
 	/*
 	 * Grab the TX lock - it's ok to do this here; we haven't
 	 * yet started transmitting.
 	 */
 	ATH_TX_LOCK(sc);
 
 	/*
 	 * Node reference, if there's one.
 	 */
 	ni = (struct ieee80211_node *) m->m_pkthdr.rcvif;
 
 	/*
 	 * Enforce how deep a node queue can get.
 	 *
 	 * XXX it would be nicer if we kept an mbuf queue per
 	 * node and only whacked them into ath_bufs when we
 	 * are ready to schedule some traffic from them.
 	 * .. that may come later.
 	 *
 	 * XXX we should also track the per-node hardware queue
 	 * depth so it is easy to limit the _SUM_ of the swq and
 	 * hwq frames.  Since we only schedule two HWQ frames
 	 * at a time, this should be OK for now.
 	 */
 	if ((!(m->m_flags & M_EAPOL)) &&
 	    (ATH_NODE(ni)->an_swq_depth > sc->sc_txq_node_maxdepth)) {
 		sc->sc_stats.ast_tx_nodeq_overflow++;
 		retval = ENOBUFS;
 		goto finish;
 	}
 
 	/*
 	 * Check how many TX buffers are available.
 	 *
 	 * If this is for non-EAPOL traffic, just leave some
 	 * space free in order for buffer cloning and raw
 	 * frame transmission to occur.
 	 *
 	 * If it's for EAPOL traffic, ignore this for now.
 	 * Management traffic will be sent via the raw transmit
 	 * method which bypasses this check.
 	 *
 	 * This is needed to ensure that EAPOL frames during
 	 * (re) keying have a chance to go out.
 	 *
 	 * See kern/138379 for more information.
 	 */
 	if ((!(m->m_flags & M_EAPOL)) &&
 	    (sc->sc_txbuf_cnt <= sc->sc_txq_data_minfree)) {
 		sc->sc_stats.ast_tx_nobuf++;
 		retval = ENOBUFS;
 		goto finish;
 	}
 
 	/*
 	 * Grab a TX buffer and associated resources.
 	 *
 	 * If it's an EAPOL frame, allocate a MGMT ath_buf.
 	 * That way even with temporary buffer exhaustion due to
 	 * the data path doesn't leave us without the ability
 	 * to transmit management frames.
 	 *
 	 * Otherwise allocate a normal buffer.
 	 */
 	if (m->m_flags & M_EAPOL)
 		bf = ath_getbuf(sc, ATH_BUFTYPE_MGMT);
 	else
 		bf = ath_getbuf(sc, ATH_BUFTYPE_NORMAL);
 
 	if (bf == NULL) {
 		/*
 		 * If we failed to allocate a buffer, fail.
 		 *
 		 * We shouldn't fail normally, due to the check
 		 * above.
 		 */
 		sc->sc_stats.ast_tx_nobuf++;
 		retval = ENOBUFS;
 		goto finish;
 	}
 
 	/*
 	 * At this point we have a buffer; so we need to free it
 	 * if we hit any error conditions.
 	 */
 
 	/*
 	 * Check for fragmentation.  If this frame
 	 * has been broken up verify we have enough
 	 * buffers to send all the fragments so all
 	 * go out or none...
 	 */
 	TAILQ_INIT(&frags);
 	if ((m->m_flags & M_FRAG) &&
 	    !ath_txfrag_setup(sc, &frags, m, ni)) {
 		DPRINTF(sc, ATH_DEBUG_XMIT,
 		    "%s: out of txfrag buffers\n", __func__);
 		sc->sc_stats.ast_tx_nofrag++;
 		if_inc_counter(ni->ni_vap->iv_ifp, IFCOUNTER_OERRORS, 1);
 		/*
 		 * XXXGL: is mbuf valid after ath_txfrag_setup? If yes,
 		 * we shouldn't free it but return back.
 		 */
 		ieee80211_free_mbuf(m);
 		m = NULL;
 		goto bad;
 	}
 
 	/*
 	 * At this point if we have any TX fragments, then we will
 	 * have bumped the node reference once for each of those.
 	 */
 
 	/*
 	 * XXX Is there anything actually _enforcing_ that the
 	 * fragments are being transmitted in one hit, rather than
 	 * being interleaved with other transmissions on that
 	 * hardware queue?
 	 *
 	 * The ATH TX output lock is the only thing serialising this
 	 * right now.
 	 */
 
 	/*
 	 * Calculate the "next fragment" length field in ath_buf
 	 * in order to let the transmit path know enough about
 	 * what to next write to the hardware.
 	 */
 	if (m->m_flags & M_FRAG) {
 		struct ath_buf *fbf = bf;
 		struct ath_buf *n_fbf = NULL;
 		struct mbuf *fm = m->m_nextpkt;
 
 		/*
 		 * We need to walk the list of fragments and set
 		 * the next size to the following buffer.
 		 * However, the first buffer isn't in the frag
 		 * list, so we have to do some gymnastics here.
 		 */
 		TAILQ_FOREACH(n_fbf, &frags, bf_list) {
 			fbf->bf_nextfraglen = fm->m_pkthdr.len;
 			fbf = n_fbf;
 			fm = fm->m_nextpkt;
 		}
 	}
 
 nextfrag:
 	/*
 	 * Pass the frame to the h/w for transmission.
 	 * Fragmented frames have each frag chained together
 	 * with m_nextpkt.  We know there are sufficient ath_buf's
 	 * to send all the frags because of work done by
 	 * ath_txfrag_setup.  We leave m_nextpkt set while
 	 * calling ath_tx_start so it can use it to extend the
 	 * the tx duration to cover the subsequent frag and
 	 * so it can reclaim all the mbufs in case of an error;
 	 * ath_tx_start clears m_nextpkt once it commits to
 	 * handing the frame to the hardware.
 	 *
 	 * Note: if this fails, then the mbufs are freed but
 	 * not the node reference.
 	 *
 	 * So, we now have to free the node reference ourselves here
 	 * and return OK up to the stack.
 	 */
 	next = m->m_nextpkt;
 	if (ath_tx_start(sc, ni, bf, m)) {
 bad:
 		if_inc_counter(ni->ni_vap->iv_ifp, IFCOUNTER_OERRORS, 1);
 reclaim:
 		bf->bf_m = NULL;
 		bf->bf_node = NULL;
 		ATH_TXBUF_LOCK(sc);
 		ath_returnbuf_head(sc, bf);
 		/*
 		 * Free the rest of the node references and
 		 * buffers for the fragment list.
 		 */
 		ath_txfrag_cleanup(sc, &frags, ni);
 		ATH_TXBUF_UNLOCK(sc);
 
 		/*
 		 * XXX: And free the node/return OK; ath_tx_start() may have
 		 *      modified the buffer.  We currently have no way to
 		 *      signify that the mbuf was freed but there was an error.
 		 */
 		ieee80211_free_node(ni);
 		retval = 0;
 		goto finish;
 	}
 
 	/*
 	 * Check here if the node is in power save state.
 	 */
 	ath_tx_update_tim(sc, ni, 1);
 
 	if (next != NULL) {
 		/*
 		 * Beware of state changing between frags.
 		 * XXX check sta power-save state?
 		 */
 		if (ni->ni_vap->iv_state != IEEE80211_S_RUN) {
 			DPRINTF(sc, ATH_DEBUG_XMIT,
 			    "%s: flush fragmented packet, state %s\n",
 			    __func__,
 			    ieee80211_state_name[ni->ni_vap->iv_state]);
 			/* XXX dmamap */
 			ieee80211_free_mbuf(next);
 			goto reclaim;
 		}
 		m = next;
 		bf = TAILQ_FIRST(&frags);
 		KASSERT(bf != NULL, ("no buf for txfrag"));
 		TAILQ_REMOVE(&frags, bf, bf_list);
 		goto nextfrag;
 	}
 
 	/*
 	 * Bump watchdog timer.
 	 */
 	sc->sc_wd_timer = 5;
 
 finish:
 	ATH_TX_UNLOCK(sc);
 
 	/*
 	 * Finished transmitting!
 	 */
 	ATH_PCU_LOCK(sc);
 	sc->sc_txstart_cnt--;
 	ATH_PCU_UNLOCK(sc);
 
 	/* Sleep the hardware if required */
 	ATH_LOCK(sc);
 	ath_power_restore_power_state(sc);
 	ATH_UNLOCK(sc);
 
 	ATH_KTR(sc, ATH_KTR_TX, 0, "ath_transmit: finished");
 	
 	return (retval);
 }
 
 static int
 ath_media_change(struct ifnet *ifp)
 {
 	int error = ieee80211_media_change(ifp);
 	/* NB: only the fixed rate can change and that doesn't need a reset */
 	return (error == ENETRESET ? 0 : error);
 }
 
 /*
  * Block/unblock tx+rx processing while a key change is done.
  * We assume the caller serializes key management operations
  * so we only need to worry about synchronization with other
  * uses that originate in the driver.
  */
 static void
 ath_key_update_begin(struct ieee80211vap *vap)
 {
 	struct ath_softc *sc = vap->iv_ic->ic_softc;
 
 	DPRINTF(sc, ATH_DEBUG_KEYCACHE, "%s:\n", __func__);
 	taskqueue_block(sc->sc_tq);
 }
 
 static void
 ath_key_update_end(struct ieee80211vap *vap)
 {
 	struct ath_softc *sc = vap->iv_ic->ic_softc;
 
 	DPRINTF(sc, ATH_DEBUG_KEYCACHE, "%s:\n", __func__);
 	taskqueue_unblock(sc->sc_tq);
 }
 
 static void
 ath_update_promisc(struct ieee80211com *ic)
 {
 	struct ath_softc *sc = ic->ic_softc;
 	u_int32_t rfilt;
 
 	/* configure rx filter */
 	ATH_LOCK(sc);
 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
 	rfilt = ath_calcrxfilter(sc);
 	ath_hal_setrxfilter(sc->sc_ah, rfilt);
 	ath_power_restore_power_state(sc);
 	ATH_UNLOCK(sc);
 
 	DPRINTF(sc, ATH_DEBUG_MODE, "%s: RX filter 0x%x\n", __func__, rfilt);
 }
 
 /*
  * Driver-internal mcast update call.
  *
  * Assumes the hardware is already awake.
  */
 static void
 ath_update_mcast_hw(struct ath_softc *sc)
 {
 	struct ieee80211com *ic = &sc->sc_ic;
 	u_int32_t mfilt[2];
 
 	/* calculate and install multicast filter */
 	if (ic->ic_allmulti == 0) {
 		struct ieee80211vap *vap;
 		struct ifnet *ifp;
 		struct ifmultiaddr *ifma;
 
 		/*
 		 * Merge multicast addresses to form the hardware filter.
 		 */
 		mfilt[0] = mfilt[1] = 0;
 		TAILQ_FOREACH(vap, &ic->ic_vaps, iv_next) {
 			ifp = vap->iv_ifp;
 			if_maddr_rlock(ifp);
 			TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 				caddr_t dl;
 				uint32_t val;
 				uint8_t pos;
 
 				/* calculate XOR of eight 6bit values */
 				dl = LLADDR((struct sockaddr_dl *)
 				    ifma->ifma_addr);
 				val = le32dec(dl + 0);
 				pos = (val >> 18) ^ (val >> 12) ^ (val >> 6) ^
 				    val;
 				val = le32dec(dl + 3);
 				pos ^= (val >> 18) ^ (val >> 12) ^ (val >> 6) ^
 				    val;
 				pos &= 0x3f;
 				mfilt[pos / 32] |= (1 << (pos % 32));
 			}
 			if_maddr_runlock(ifp);
 		}
 	} else
 		mfilt[0] = mfilt[1] = ~0;
 
 	ath_hal_setmcastfilter(sc->sc_ah, mfilt[0], mfilt[1]);
 
 	DPRINTF(sc, ATH_DEBUG_MODE, "%s: MC filter %08x:%08x\n",
 		__func__, mfilt[0], mfilt[1]);
 }
 
 /*
  * Called from the net80211 layer - force the hardware
  * awake before operating.
  */
 static void
 ath_update_mcast(struct ieee80211com *ic)
 {
 	struct ath_softc *sc = ic->ic_softc;
 
 	ATH_LOCK(sc);
 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
 	ATH_UNLOCK(sc);
 
 	ath_update_mcast_hw(sc);
 
 	ATH_LOCK(sc);
 	ath_power_restore_power_state(sc);
 	ATH_UNLOCK(sc);
 }
 
 void
 ath_mode_init(struct ath_softc *sc)
 {
 	struct ieee80211com *ic = &sc->sc_ic;
 	struct ath_hal *ah = sc->sc_ah;
 	u_int32_t rfilt;
 
 	/* configure rx filter */
 	rfilt = ath_calcrxfilter(sc);
 	ath_hal_setrxfilter(ah, rfilt);
 
 	/* configure operational mode */
 	ath_hal_setopmode(ah);
 
 	/* handle any link-level address change */
 	ath_hal_setmac(ah, ic->ic_macaddr);
 
 	/* calculate and install multicast filter */
 	ath_update_mcast_hw(sc);
 }
 
 /*
  * Set the slot time based on the current setting.
  */
 void
 ath_setslottime(struct ath_softc *sc)
 {
 	struct ieee80211com *ic = &sc->sc_ic;
 	struct ath_hal *ah = sc->sc_ah;
 	u_int usec;
 
 	if (IEEE80211_IS_CHAN_HALF(ic->ic_curchan))
 		usec = 13;
 	else if (IEEE80211_IS_CHAN_QUARTER(ic->ic_curchan))
 		usec = 21;
 	else if (IEEE80211_IS_CHAN_ANYG(ic->ic_curchan)) {
 		/* honor short/long slot time only in 11g */
 		/* XXX shouldn't honor on pure g or turbo g channel */
 		if (ic->ic_flags & IEEE80211_F_SHSLOT)
 			usec = HAL_SLOT_TIME_9;
 		else
 			usec = HAL_SLOT_TIME_20;
 	} else
 		usec = HAL_SLOT_TIME_9;
 
 	DPRINTF(sc, ATH_DEBUG_RESET,
 	    "%s: chan %u MHz flags 0x%x %s slot, %u usec\n",
 	    __func__, ic->ic_curchan->ic_freq, ic->ic_curchan->ic_flags,
 	    ic->ic_flags & IEEE80211_F_SHSLOT ? "short" : "long", usec);
 
 	/* Wake up the hardware first before updating the slot time */
 	ATH_LOCK(sc);
 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
 	ath_hal_setslottime(ah, usec);
 	ath_power_restore_power_state(sc);
 	sc->sc_updateslot = OK;
 	ATH_UNLOCK(sc);
 }
 
 /*
  * Callback from the 802.11 layer to update the
  * slot time based on the current setting.
  */
 static void
 ath_updateslot(struct ieee80211com *ic)
 {
 	struct ath_softc *sc = ic->ic_softc;
 
 	/*
 	 * When not coordinating the BSS, change the hardware
 	 * immediately.  For other operation we defer the change
 	 * until beacon updates have propagated to the stations.
 	 *
 	 * XXX sc_updateslot isn't changed behind a lock?
 	 */
 	if (ic->ic_opmode == IEEE80211_M_HOSTAP ||
 	    ic->ic_opmode == IEEE80211_M_MBSS)
 		sc->sc_updateslot = UPDATE;
 	else
 		ath_setslottime(sc);
 }
 
 /*
  * Append the contents of src to dst; both queues
  * are assumed to be locked.
  */
 void
 ath_txqmove(struct ath_txq *dst, struct ath_txq *src)
 {
 
 	ATH_TXQ_LOCK_ASSERT(src);
 	ATH_TXQ_LOCK_ASSERT(dst);
 
 	TAILQ_CONCAT(&dst->axq_q, &src->axq_q, bf_list);
 	dst->axq_link = src->axq_link;
 	src->axq_link = NULL;
 	dst->axq_depth += src->axq_depth;
 	dst->axq_aggr_depth += src->axq_aggr_depth;
 	src->axq_depth = 0;
 	src->axq_aggr_depth = 0;
 }
 
 /*
  * Reset the hardware, with no loss.
  *
  * This can't be used for a general case reset.
  */
 static void
 ath_reset_proc(void *arg, int pending)
 {
 	struct ath_softc *sc = arg;
 
 #if 0
 	device_printf(sc->sc_dev, "%s: resetting\n", __func__);
 #endif
 	ath_reset(sc, ATH_RESET_NOLOSS);
 }
 
 /*
  * Reset the hardware after detecting beacons have stopped.
  */
 static void
 ath_bstuck_proc(void *arg, int pending)
 {
 	struct ath_softc *sc = arg;
 	uint32_t hangs = 0;
 
 	if (ath_hal_gethangstate(sc->sc_ah, 0xff, &hangs) && hangs != 0)
 		device_printf(sc->sc_dev, "bb hang detected (0x%x)\n", hangs);
 
 #ifdef	ATH_DEBUG_ALQ
 	if (if_ath_alq_checkdebug(&sc->sc_alq, ATH_ALQ_STUCK_BEACON))
 		if_ath_alq_post(&sc->sc_alq, ATH_ALQ_STUCK_BEACON, 0, NULL);
 #endif
 
 	device_printf(sc->sc_dev, "stuck beacon; resetting (bmiss count %u)\n",
 	    sc->sc_bmisscount);
 	sc->sc_stats.ast_bstuck++;
 	/*
 	 * This assumes that there's no simultaneous channel mode change
 	 * occurring.
 	 */
 	ath_reset(sc, ATH_RESET_NOLOSS);
 }
 
 static int
 ath_desc_alloc(struct ath_softc *sc)
 {
 	int error;
 
 	error = ath_descdma_setup(sc, &sc->sc_txdma, &sc->sc_txbuf,
 		    "tx", sc->sc_tx_desclen, ath_txbuf, ATH_MAX_SCATTER);
 	if (error != 0) {
 		return error;
 	}
 	sc->sc_txbuf_cnt = ath_txbuf;
 
 	error = ath_descdma_setup(sc, &sc->sc_txdma_mgmt, &sc->sc_txbuf_mgmt,
 		    "tx_mgmt", sc->sc_tx_desclen, ath_txbuf_mgmt,
 		    ATH_TXDESC);
 	if (error != 0) {
 		ath_descdma_cleanup(sc, &sc->sc_txdma, &sc->sc_txbuf);
 		return error;
 	}
 
 	/*
 	 * XXX mark txbuf_mgmt frames with ATH_BUF_MGMT, so the
 	 * flag doesn't have to be set in ath_getbuf_locked().
 	 */
 
 	error = ath_descdma_setup(sc, &sc->sc_bdma, &sc->sc_bbuf,
 			"beacon", sc->sc_tx_desclen, ATH_BCBUF, 1);
 	if (error != 0) {
 		ath_descdma_cleanup(sc, &sc->sc_txdma, &sc->sc_txbuf);
 		ath_descdma_cleanup(sc, &sc->sc_txdma_mgmt,
 		    &sc->sc_txbuf_mgmt);
 		return error;
 	}
 	return 0;
 }
 
 static void
 ath_desc_free(struct ath_softc *sc)
 {
 
 	if (sc->sc_bdma.dd_desc_len != 0)
 		ath_descdma_cleanup(sc, &sc->sc_bdma, &sc->sc_bbuf);
 	if (sc->sc_txdma.dd_desc_len != 0)
 		ath_descdma_cleanup(sc, &sc->sc_txdma, &sc->sc_txbuf);
 	if (sc->sc_txdma_mgmt.dd_desc_len != 0)
 		ath_descdma_cleanup(sc, &sc->sc_txdma_mgmt,
 		    &sc->sc_txbuf_mgmt);
 }
 
 static struct ieee80211_node *
 ath_node_alloc(struct ieee80211vap *vap, const uint8_t mac[IEEE80211_ADDR_LEN])
 {
 	struct ieee80211com *ic = vap->iv_ic;
 	struct ath_softc *sc = ic->ic_softc;
 	const size_t space = sizeof(struct ath_node) + sc->sc_rc->arc_space;
 	struct ath_node *an;
 
 	an = malloc(space, M_80211_NODE, M_NOWAIT|M_ZERO);
 	if (an == NULL) {
 		/* XXX stat+msg */
 		return NULL;
 	}
 	ath_rate_node_init(sc, an);
 
 	/* Setup the mutex - there's no associd yet so set the name to NULL */
 	snprintf(an->an_name, sizeof(an->an_name), "%s: node %p",
 	    device_get_nameunit(sc->sc_dev), an);
 	mtx_init(&an->an_mtx, an->an_name, NULL, MTX_DEF);
 
 	/* XXX setup ath_tid */
 	ath_tx_tid_init(sc, an);
 
 	DPRINTF(sc, ATH_DEBUG_NODE, "%s: %6D: an %p\n", __func__, mac, ":", an);
 	return &an->an_node;
 }
 
 static void
 ath_node_cleanup(struct ieee80211_node *ni)
 {
 	struct ieee80211com *ic = ni->ni_ic;
 	struct ath_softc *sc = ic->ic_softc;
 
 	DPRINTF(sc, ATH_DEBUG_NODE, "%s: %6D: an %p\n", __func__,
 	    ni->ni_macaddr, ":", ATH_NODE(ni));
 
 	/* Cleanup ath_tid, free unused bufs, unlink bufs in TXQ */
 	ath_tx_node_flush(sc, ATH_NODE(ni));
 	ath_rate_node_cleanup(sc, ATH_NODE(ni));
 	sc->sc_node_cleanup(ni);
 }
 
 static void
 ath_node_free(struct ieee80211_node *ni)
 {
 	struct ieee80211com *ic = ni->ni_ic;
 	struct ath_softc *sc = ic->ic_softc;
 
 	DPRINTF(sc, ATH_DEBUG_NODE, "%s: %6D: an %p\n", __func__,
 	    ni->ni_macaddr, ":", ATH_NODE(ni));
 	mtx_destroy(&ATH_NODE(ni)->an_mtx);
 	sc->sc_node_free(ni);
 }
 
 static void
 ath_node_getsignal(const struct ieee80211_node *ni, int8_t *rssi, int8_t *noise)
 {
 	struct ieee80211com *ic = ni->ni_ic;
 	struct ath_softc *sc = ic->ic_softc;
 	struct ath_hal *ah = sc->sc_ah;
 
 	*rssi = ic->ic_node_getrssi(ni);
 	if (ni->ni_chan != IEEE80211_CHAN_ANYC)
 		*noise = ath_hal_getchannoise(ah, ni->ni_chan);
 	else
 		*noise = -95;		/* nominally correct */
 }
 
 /*
  * Set the default antenna.
  */
 void
 ath_setdefantenna(struct ath_softc *sc, u_int antenna)
 {
 	struct ath_hal *ah = sc->sc_ah;
 
 	/* XXX block beacon interrupts */
 	ath_hal_setdefantenna(ah, antenna);
 	if (sc->sc_defant != antenna)
 		sc->sc_stats.ast_ant_defswitch++;
 	sc->sc_defant = antenna;
 	sc->sc_rxotherant = 0;
 }
 
 static void
 ath_txq_init(struct ath_softc *sc, struct ath_txq *txq, int qnum)
 {
 	txq->axq_qnum = qnum;
 	txq->axq_ac = 0;
 	txq->axq_depth = 0;
 	txq->axq_aggr_depth = 0;
 	txq->axq_intrcnt = 0;
 	txq->axq_link = NULL;
 	txq->axq_softc = sc;
 	TAILQ_INIT(&txq->axq_q);
 	TAILQ_INIT(&txq->axq_tidq);
 	TAILQ_INIT(&txq->fifo.axq_q);
 	ATH_TXQ_LOCK_INIT(sc, txq);
 }
 
 /*
  * Setup a h/w transmit queue.
  */
 static struct ath_txq *
 ath_txq_setup(struct ath_softc *sc, int qtype, int subtype)
 {
 	struct ath_hal *ah = sc->sc_ah;
 	HAL_TXQ_INFO qi;
 	int qnum;
 
 	memset(&qi, 0, sizeof(qi));
 	qi.tqi_subtype = subtype;
 	qi.tqi_aifs = HAL_TXQ_USEDEFAULT;
 	qi.tqi_cwmin = HAL_TXQ_USEDEFAULT;
 	qi.tqi_cwmax = HAL_TXQ_USEDEFAULT;
 	/*
 	 * Enable interrupts only for EOL and DESC conditions.
 	 * We mark tx descriptors to receive a DESC interrupt
 	 * when a tx queue gets deep; otherwise waiting for the
 	 * EOL to reap descriptors.  Note that this is done to
 	 * reduce interrupt load and this only defers reaping
 	 * descriptors, never transmitting frames.  Aside from
 	 * reducing interrupts this also permits more concurrency.
 	 * The only potential downside is if the tx queue backs
 	 * up in which case the top half of the kernel may backup
 	 * due to a lack of tx descriptors.
 	 */
 	if (sc->sc_isedma)
 		qi.tqi_qflags = HAL_TXQ_TXEOLINT_ENABLE |
 		    HAL_TXQ_TXOKINT_ENABLE;
 	else
 		qi.tqi_qflags = HAL_TXQ_TXEOLINT_ENABLE |
 		    HAL_TXQ_TXDESCINT_ENABLE;
 
 	qnum = ath_hal_setuptxqueue(ah, qtype, &qi);
 	if (qnum == -1) {
 		/*
 		 * NB: don't print a message, this happens
 		 * normally on parts with too few tx queues
 		 */
 		return NULL;
 	}
 	if (qnum >= nitems(sc->sc_txq)) {
 		device_printf(sc->sc_dev,
 			"hal qnum %u out of range, max %zu!\n",
 			qnum, nitems(sc->sc_txq));
 		ath_hal_releasetxqueue(ah, qnum);
 		return NULL;
 	}
 	if (!ATH_TXQ_SETUP(sc, qnum)) {
 		ath_txq_init(sc, &sc->sc_txq[qnum], qnum);
 		sc->sc_txqsetup |= 1<<qnum;
 	}
 	return &sc->sc_txq[qnum];
 }
 
 /*
  * Setup a hardware data transmit queue for the specified
  * access control.  The hal may not support all requested
  * queues in which case it will return a reference to a
  * previously setup queue.  We record the mapping from ac's
  * to h/w queues for use by ath_tx_start and also track
  * the set of h/w queues being used to optimize work in the
  * transmit interrupt handler and related routines.
  */
 static int
 ath_tx_setup(struct ath_softc *sc, int ac, int haltype)
 {
 	struct ath_txq *txq;
 
 	if (ac >= nitems(sc->sc_ac2q)) {
 		device_printf(sc->sc_dev, "AC %u out of range, max %zu!\n",
 			ac, nitems(sc->sc_ac2q));
 		return 0;
 	}
 	txq = ath_txq_setup(sc, HAL_TX_QUEUE_DATA, haltype);
 	if (txq != NULL) {
 		txq->axq_ac = ac;
 		sc->sc_ac2q[ac] = txq;
 		return 1;
 	} else
 		return 0;
 }
 
 /*
  * Update WME parameters for a transmit queue.
  */
 static int
 ath_txq_update(struct ath_softc *sc, int ac)
 {
 #define	ATH_EXPONENT_TO_VALUE(v)	((1<<v)-1)
 	struct ieee80211com *ic = &sc->sc_ic;
 	struct ath_txq *txq = sc->sc_ac2q[ac];
 	struct wmeParams *wmep = &ic->ic_wme.wme_chanParams.cap_wmeParams[ac];
 	struct ath_hal *ah = sc->sc_ah;
 	HAL_TXQ_INFO qi;
 
 	ath_hal_gettxqueueprops(ah, txq->axq_qnum, &qi);
 #ifdef IEEE80211_SUPPORT_TDMA
 	if (sc->sc_tdma) {
 		/*
 		 * AIFS is zero so there's no pre-transmit wait.  The
 		 * burst time defines the slot duration and is configured
 		 * through net80211.  The QCU is setup to not do post-xmit
 		 * back off, lockout all lower-priority QCU's, and fire
 		 * off the DMA beacon alert timer which is setup based
 		 * on the slot configuration.
 		 */
 		qi.tqi_qflags = HAL_TXQ_TXOKINT_ENABLE
 			      | HAL_TXQ_TXERRINT_ENABLE
 			      | HAL_TXQ_TXURNINT_ENABLE
 			      | HAL_TXQ_TXEOLINT_ENABLE
 			      | HAL_TXQ_DBA_GATED
 			      | HAL_TXQ_BACKOFF_DISABLE
 			      | HAL_TXQ_ARB_LOCKOUT_GLOBAL
 			      ;
 		qi.tqi_aifs = 0;
 		/* XXX +dbaprep? */
 		qi.tqi_readyTime = sc->sc_tdmaslotlen;
 		qi.tqi_burstTime = qi.tqi_readyTime;
 	} else {
 #endif
 		/*
 		 * XXX shouldn't this just use the default flags
 		 * used in the previous queue setup?
 		 */
 		qi.tqi_qflags = HAL_TXQ_TXOKINT_ENABLE
 			      | HAL_TXQ_TXERRINT_ENABLE
 			      | HAL_TXQ_TXDESCINT_ENABLE
 			      | HAL_TXQ_TXURNINT_ENABLE
 			      | HAL_TXQ_TXEOLINT_ENABLE
 			      ;
 		qi.tqi_aifs = wmep->wmep_aifsn;
 		qi.tqi_cwmin = ATH_EXPONENT_TO_VALUE(wmep->wmep_logcwmin);
 		qi.tqi_cwmax = ATH_EXPONENT_TO_VALUE(wmep->wmep_logcwmax);
 		qi.tqi_readyTime = 0;
 		qi.tqi_burstTime = IEEE80211_TXOP_TO_US(wmep->wmep_txopLimit);
 #ifdef IEEE80211_SUPPORT_TDMA
 	}
 #endif
 
 	DPRINTF(sc, ATH_DEBUG_RESET,
 	    "%s: Q%u qflags 0x%x aifs %u cwmin %u cwmax %u burstTime %u\n",
 	    __func__, txq->axq_qnum, qi.tqi_qflags,
 	    qi.tqi_aifs, qi.tqi_cwmin, qi.tqi_cwmax, qi.tqi_burstTime);
 
 	if (!ath_hal_settxqueueprops(ah, txq->axq_qnum, &qi)) {
 		device_printf(sc->sc_dev, "unable to update hardware queue "
 		    "parameters for %s traffic!\n", ieee80211_wme_acnames[ac]);
 		return 0;
 	} else {
 		ath_hal_resettxqueue(ah, txq->axq_qnum); /* push to h/w */
 		return 1;
 	}
 #undef ATH_EXPONENT_TO_VALUE
 }
 
 /*
  * Callback from the 802.11 layer to update WME parameters.
  */
 int
 ath_wme_update(struct ieee80211com *ic)
 {
 	struct ath_softc *sc = ic->ic_softc;
 
 	return !ath_txq_update(sc, WME_AC_BE) ||
 	    !ath_txq_update(sc, WME_AC_BK) ||
 	    !ath_txq_update(sc, WME_AC_VI) ||
 	    !ath_txq_update(sc, WME_AC_VO) ? EIO : 0;
 }
 
 /*
  * Reclaim resources for a setup queue.
  */
 static void
 ath_tx_cleanupq(struct ath_softc *sc, struct ath_txq *txq)
 {
 
 	ath_hal_releasetxqueue(sc->sc_ah, txq->axq_qnum);
 	sc->sc_txqsetup &= ~(1<<txq->axq_qnum);
 	ATH_TXQ_LOCK_DESTROY(txq);
 }
 
 /*
  * Reclaim all tx queue resources.
  */
 static void
 ath_tx_cleanup(struct ath_softc *sc)
 {
 	int i;
 
 	ATH_TXBUF_LOCK_DESTROY(sc);
 	for (i = 0; i < HAL_NUM_TX_QUEUES; i++)
 		if (ATH_TXQ_SETUP(sc, i))
 			ath_tx_cleanupq(sc, &sc->sc_txq[i]);
 }
 
 /*
  * Return h/w rate index for an IEEE rate (w/o basic rate bit)
  * using the current rates in sc_rixmap.
  */
 int
 ath_tx_findrix(const struct ath_softc *sc, uint8_t rate)
 {
 	int rix = sc->sc_rixmap[rate];
 	/* NB: return lowest rix for invalid rate */
 	return (rix == 0xff ? 0 : rix);
 }
 
 static void
 ath_tx_update_stats(struct ath_softc *sc, struct ath_tx_status *ts,
     struct ath_buf *bf)
 {
 	struct ieee80211_node *ni = bf->bf_node;
 	struct ieee80211com *ic = &sc->sc_ic;
 	int sr, lr, pri;
 
 	if (ts->ts_status == 0) {
 		u_int8_t txant = ts->ts_antenna;
 		sc->sc_stats.ast_ant_tx[txant]++;
 		sc->sc_ant_tx[txant]++;
 		if (ts->ts_finaltsi != 0)
 			sc->sc_stats.ast_tx_altrate++;
 		pri = M_WME_GETAC(bf->bf_m);
 		if (pri >= WME_AC_VO)
 			ic->ic_wme.wme_hipri_traffic++;
 		if ((bf->bf_state.bfs_txflags & HAL_TXDESC_NOACK) == 0)
 			ni->ni_inact = ni->ni_inact_reload;
 	} else {
 		if (ts->ts_status & HAL_TXERR_XRETRY)
 			sc->sc_stats.ast_tx_xretries++;
 		if (ts->ts_status & HAL_TXERR_FIFO)
 			sc->sc_stats.ast_tx_fifoerr++;
 		if (ts->ts_status & HAL_TXERR_FILT)
 			sc->sc_stats.ast_tx_filtered++;
 		if (ts->ts_status & HAL_TXERR_XTXOP)
 			sc->sc_stats.ast_tx_xtxop++;
 		if (ts->ts_status & HAL_TXERR_TIMER_EXPIRED)
 			sc->sc_stats.ast_tx_timerexpired++;
 
 		if (bf->bf_m->m_flags & M_FF)
 			sc->sc_stats.ast_ff_txerr++;
 	}
 	/* XXX when is this valid? */
 	if (ts->ts_flags & HAL_TX_DESC_CFG_ERR)
 		sc->sc_stats.ast_tx_desccfgerr++;
 	/*
 	 * This can be valid for successful frame transmission!
 	 * If there's a TX FIFO underrun during aggregate transmission,
 	 * the MAC will pad the rest of the aggregate with delimiters.
 	 * If a BA is returned, the frame is marked as "OK" and it's up
 	 * to the TX completion code to notice which frames weren't
 	 * successfully transmitted.
 	 */
 	if (ts->ts_flags & HAL_TX_DATA_UNDERRUN)
 		sc->sc_stats.ast_tx_data_underrun++;
 	if (ts->ts_flags & HAL_TX_DELIM_UNDERRUN)
 		sc->sc_stats.ast_tx_delim_underrun++;
 
 	sr = ts->ts_shortretry;
 	lr = ts->ts_longretry;
 	sc->sc_stats.ast_tx_shortretry += sr;
 	sc->sc_stats.ast_tx_longretry += lr;
 
 }
 
 /*
  * The default completion. If fail is 1, this means
  * "please don't retry the frame, and just return -1 status
  * to the net80211 stack.
  */
 void
 ath_tx_default_comp(struct ath_softc *sc, struct ath_buf *bf, int fail)
 {
 	struct ath_tx_status *ts = &bf->bf_status.ds_txstat;
 	int st;
 
 	if (fail == 1)
 		st = -1;
 	else
 		st = ((bf->bf_state.bfs_txflags & HAL_TXDESC_NOACK) == 0) ?
 		    ts->ts_status : HAL_TXERR_XRETRY;
 
 #if 0
 	if (bf->bf_state.bfs_dobaw)
 		device_printf(sc->sc_dev,
 		    "%s: bf %p: seqno %d: dobaw should've been cleared!\n",
 		    __func__,
 		    bf,
 		    SEQNO(bf->bf_state.bfs_seqno));
 #endif
 	if (bf->bf_next != NULL)
 		device_printf(sc->sc_dev,
 		    "%s: bf %p: seqno %d: bf_next not NULL!\n",
 		    __func__,
 		    bf,
 		    SEQNO(bf->bf_state.bfs_seqno));
 
 	/*
 	 * Check if the node software queue is empty; if so
 	 * then clear the TIM.
 	 *
 	 * This needs to be done before the buffer is freed as
 	 * otherwise the node reference will have been released
 	 * and the node may not actually exist any longer.
 	 *
 	 * XXX I don't like this belonging here, but it's cleaner
 	 * to do it here right now then all the other places
 	 * where ath_tx_default_comp() is called.
 	 *
 	 * XXX TODO: during drain, ensure that the callback is
 	 * being called so we get a chance to update the TIM.
 	 */
 	if (bf->bf_node) {
 		ATH_TX_LOCK(sc);
 		ath_tx_update_tim(sc, bf->bf_node, 0);
 		ATH_TX_UNLOCK(sc);
 	}
 
 	/*
 	 * Do any tx complete callback.  Note this must
 	 * be done before releasing the node reference.
 	 * This will free the mbuf, release the net80211
 	 * node and recycle the ath_buf.
 	 */
 	ath_tx_freebuf(sc, bf, st);
 }
 
 /*
  * Update rate control with the given completion status.
  */
 void
 ath_tx_update_ratectrl(struct ath_softc *sc, struct ieee80211_node *ni,
     struct ath_rc_series *rc, struct ath_tx_status *ts, int frmlen,
     int nframes, int nbad)
 {
 	struct ath_node *an;
 
 	/* Only for unicast frames */
 	if (ni == NULL)
 		return;
 
 	an = ATH_NODE(ni);
 	ATH_NODE_UNLOCK_ASSERT(an);
 
 	if ((ts->ts_status & HAL_TXERR_FILT) == 0) {
 		ATH_NODE_LOCK(an);
 		ath_rate_tx_complete(sc, an, rc, ts, frmlen, nframes, nbad);
 		ATH_NODE_UNLOCK(an);
 	}
 }
 
 /*
  * Process the completion of the given buffer.
  *
  * This calls the rate control update and then the buffer completion.
  * This will either free the buffer or requeue it.  In any case, the
  * bf pointer should be treated as invalid after this function is called.
  */
 void
 ath_tx_process_buf_completion(struct ath_softc *sc, struct ath_txq *txq,
     struct ath_tx_status *ts, struct ath_buf *bf)
 {
 	struct ieee80211_node *ni = bf->bf_node;
 
 	ATH_TX_UNLOCK_ASSERT(sc);
 	ATH_TXQ_UNLOCK_ASSERT(txq);
 
 	/* If unicast frame, update general statistics */
 	if (ni != NULL) {
 		/* update statistics */
 		ath_tx_update_stats(sc, ts, bf);
 	}
 
 	/*
 	 * Call the completion handler.
 	 * The completion handler is responsible for
 	 * calling the rate control code.
 	 *
 	 * Frames with no completion handler get the
 	 * rate control code called here.
 	 */
 	if (bf->bf_comp == NULL) {
 		if ((ts->ts_status & HAL_TXERR_FILT) == 0 &&
 		    (bf->bf_state.bfs_txflags & HAL_TXDESC_NOACK) == 0) {
 			/*
 			 * XXX assume this isn't an aggregate
 			 * frame.
 			 */
 			ath_tx_update_ratectrl(sc, ni,
 			     bf->bf_state.bfs_rc, ts,
 			    bf->bf_state.bfs_pktlen, 1,
 			    (ts->ts_status == 0 ? 0 : 1));
 		}
 		ath_tx_default_comp(sc, bf, 0);
 	} else
 		bf->bf_comp(sc, bf, 0);
 }
 
 
 
 /*
  * Process completed xmit descriptors from the specified queue.
  * Kick the packet scheduler if needed. This can occur from this
  * particular task.
  */
 static int
 ath_tx_processq(struct ath_softc *sc, struct ath_txq *txq, int dosched)
 {
 	struct ath_hal *ah = sc->sc_ah;
 	struct ath_buf *bf;
 	struct ath_desc *ds;
 	struct ath_tx_status *ts;
 	struct ieee80211_node *ni;
 #ifdef	IEEE80211_SUPPORT_SUPERG
 	struct ieee80211com *ic = &sc->sc_ic;
 #endif	/* IEEE80211_SUPPORT_SUPERG */
 	int nacked;
 	HAL_STATUS status;
 
 	DPRINTF(sc, ATH_DEBUG_TX_PROC, "%s: tx queue %u head %p link %p\n",
 		__func__, txq->axq_qnum,
 		(caddr_t)(uintptr_t) ath_hal_gettxbuf(sc->sc_ah, txq->axq_qnum),
 		txq->axq_link);
 
 	ATH_KTR(sc, ATH_KTR_TXCOMP, 4,
 	    "ath_tx_processq: txq=%u head %p link %p depth %p",
 	    txq->axq_qnum,
 	    (caddr_t)(uintptr_t) ath_hal_gettxbuf(sc->sc_ah, txq->axq_qnum),
 	    txq->axq_link,
 	    txq->axq_depth);
 
 	nacked = 0;
 	for (;;) {
 		ATH_TXQ_LOCK(txq);
 		txq->axq_intrcnt = 0;	/* reset periodic desc intr count */
 		bf = TAILQ_FIRST(&txq->axq_q);
 		if (bf == NULL) {
 			ATH_TXQ_UNLOCK(txq);
 			break;
 		}
 		ds = bf->bf_lastds;	/* XXX must be setup correctly! */
 		ts = &bf->bf_status.ds_txstat;
 
 		status = ath_hal_txprocdesc(ah, ds, ts);
 #ifdef ATH_DEBUG
 		if (sc->sc_debug & ATH_DEBUG_XMIT_DESC)
 			ath_printtxbuf(sc, bf, txq->axq_qnum, 0,
 			    status == HAL_OK);
 		else if ((sc->sc_debug & ATH_DEBUG_RESET) && (dosched == 0))
 			ath_printtxbuf(sc, bf, txq->axq_qnum, 0,
 			    status == HAL_OK);
 #endif
 #ifdef	ATH_DEBUG_ALQ
 		if (if_ath_alq_checkdebug(&sc->sc_alq,
 		    ATH_ALQ_EDMA_TXSTATUS)) {
 			if_ath_alq_post(&sc->sc_alq, ATH_ALQ_EDMA_TXSTATUS,
 			sc->sc_tx_statuslen,
 			(char *) ds);
 		}
 #endif
 
 		if (status == HAL_EINPROGRESS) {
 			ATH_KTR(sc, ATH_KTR_TXCOMP, 3,
 			    "ath_tx_processq: txq=%u, bf=%p ds=%p, HAL_EINPROGRESS",
 			    txq->axq_qnum, bf, ds);
 			ATH_TXQ_UNLOCK(txq);
 			break;
 		}
 		ATH_TXQ_REMOVE(txq, bf, bf_list);
 
 		/*
 		 * Sanity check.
 		 */
 		if (txq->axq_qnum != bf->bf_state.bfs_tx_queue) {
 			device_printf(sc->sc_dev,
 			    "%s: TXQ=%d: bf=%p, bfs_tx_queue=%d\n",
 			    __func__,
 			    txq->axq_qnum,
 			    bf,
 			    bf->bf_state.bfs_tx_queue);
 		}
 		if (txq->axq_qnum != bf->bf_last->bf_state.bfs_tx_queue) {
 			device_printf(sc->sc_dev,
 			    "%s: TXQ=%d: bf_last=%p, bfs_tx_queue=%d\n",
 			    __func__,
 			    txq->axq_qnum,
 			    bf->bf_last,
 			    bf->bf_last->bf_state.bfs_tx_queue);
 		}
 
 #if 0
 		if (txq->axq_depth > 0) {
 			/*
 			 * More frames follow.  Mark the buffer busy
 			 * so it's not re-used while the hardware may
 			 * still re-read the link field in the descriptor.
 			 *
 			 * Use the last buffer in an aggregate as that
 			 * is where the hardware may be - intermediate
 			 * descriptors won't be "busy".
 			 */
 			bf->bf_last->bf_flags |= ATH_BUF_BUSY;
 		} else
 			txq->axq_link = NULL;
 #else
 		bf->bf_last->bf_flags |= ATH_BUF_BUSY;
 #endif
 		if (bf->bf_state.bfs_aggr)
 			txq->axq_aggr_depth--;
 
 		ni = bf->bf_node;
 
 		ATH_KTR(sc, ATH_KTR_TXCOMP, 5,
 		    "ath_tx_processq: txq=%u, bf=%p, ds=%p, ni=%p, ts_status=0x%08x",
 		    txq->axq_qnum, bf, ds, ni, ts->ts_status);
 		/*
 		 * If unicast frame was ack'd update RSSI,
 		 * including the last rx time used to
 		 * workaround phantom bmiss interrupts.
 		 */
 		if (ni != NULL && ts->ts_status == 0 &&
 		    ((bf->bf_state.bfs_txflags & HAL_TXDESC_NOACK) == 0)) {
 			nacked++;
 			sc->sc_stats.ast_tx_rssi = ts->ts_rssi;
 			ATH_RSSI_LPF(sc->sc_halstats.ns_avgtxrssi,
 				ts->ts_rssi);
 		}
 		ATH_TXQ_UNLOCK(txq);
 
 		/*
 		 * Update statistics and call completion
 		 */
 		ath_tx_process_buf_completion(sc, txq, ts, bf);
 
 		/* XXX at this point, bf and ni may be totally invalid */
 	}
 #ifdef IEEE80211_SUPPORT_SUPERG
 	/*
 	 * Flush fast-frame staging queue when traffic slows.
 	 */
 	if (txq->axq_depth <= 1)
 		ieee80211_ff_flush(ic, txq->axq_ac);
 #endif
 
 	/* Kick the software TXQ scheduler */
 	if (dosched) {
 		ATH_TX_LOCK(sc);
 		ath_txq_sched(sc, txq);
 		ATH_TX_UNLOCK(sc);
 	}
 
 	ATH_KTR(sc, ATH_KTR_TXCOMP, 1,
 	    "ath_tx_processq: txq=%u: done",
 	    txq->axq_qnum);
 
 	return nacked;
 }
 
 #define	TXQACTIVE(t, q)		( (t) & (1 << (q)))
 
 /*
  * Deferred processing of transmit interrupt; special-cased
  * for a single hardware transmit queue (e.g. 5210 and 5211).
  */
 static void
 ath_tx_proc_q0(void *arg, int npending)
 {
 	struct ath_softc *sc = arg;
 	uint32_t txqs;
 
 	ATH_PCU_LOCK(sc);
 	sc->sc_txproc_cnt++;
 	txqs = sc->sc_txq_active;
 	sc->sc_txq_active &= ~txqs;
 	ATH_PCU_UNLOCK(sc);
 
 	ATH_LOCK(sc);
 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
 	ATH_UNLOCK(sc);
 
 	ATH_KTR(sc, ATH_KTR_TXCOMP, 1,
 	    "ath_tx_proc_q0: txqs=0x%08x", txqs);
 
 	if (TXQACTIVE(txqs, 0) && ath_tx_processq(sc, &sc->sc_txq[0], 1))
 		/* XXX why is lastrx updated in tx code? */
 		sc->sc_lastrx = ath_hal_gettsf64(sc->sc_ah);
 	if (TXQACTIVE(txqs, sc->sc_cabq->axq_qnum))
 		ath_tx_processq(sc, sc->sc_cabq, 1);
 	sc->sc_wd_timer = 0;
 
 	if (sc->sc_softled)
 		ath_led_event(sc, sc->sc_txrix);
 
 	ATH_PCU_LOCK(sc);
 	sc->sc_txproc_cnt--;
 	ATH_PCU_UNLOCK(sc);
 
 	ATH_LOCK(sc);
 	ath_power_restore_power_state(sc);
 	ATH_UNLOCK(sc);
 
 	ath_tx_kick(sc);
 }
 
 /*
  * Deferred processing of transmit interrupt; special-cased
  * for four hardware queues, 0-3 (e.g. 5212 w/ WME support).
  */
 static void
 ath_tx_proc_q0123(void *arg, int npending)
 {
 	struct ath_softc *sc = arg;
 	int nacked;
 	uint32_t txqs;
 
 	ATH_PCU_LOCK(sc);
 	sc->sc_txproc_cnt++;
 	txqs = sc->sc_txq_active;
 	sc->sc_txq_active &= ~txqs;
 	ATH_PCU_UNLOCK(sc);
 
 	ATH_LOCK(sc);
 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
 	ATH_UNLOCK(sc);
 
 	ATH_KTR(sc, ATH_KTR_TXCOMP, 1,
 	    "ath_tx_proc_q0123: txqs=0x%08x", txqs);
 
 	/*
 	 * Process each active queue.
 	 */
 	nacked = 0;
 	if (TXQACTIVE(txqs, 0))
 		nacked += ath_tx_processq(sc, &sc->sc_txq[0], 1);
 	if (TXQACTIVE(txqs, 1))
 		nacked += ath_tx_processq(sc, &sc->sc_txq[1], 1);
 	if (TXQACTIVE(txqs, 2))
 		nacked += ath_tx_processq(sc, &sc->sc_txq[2], 1);
 	if (TXQACTIVE(txqs, 3))
 		nacked += ath_tx_processq(sc, &sc->sc_txq[3], 1);
 	if (TXQACTIVE(txqs, sc->sc_cabq->axq_qnum))
 		ath_tx_processq(sc, sc->sc_cabq, 1);
 	if (nacked)
 		sc->sc_lastrx = ath_hal_gettsf64(sc->sc_ah);
 
 	sc->sc_wd_timer = 0;
 
 	if (sc->sc_softled)
 		ath_led_event(sc, sc->sc_txrix);
 
 	ATH_PCU_LOCK(sc);
 	sc->sc_txproc_cnt--;
 	ATH_PCU_UNLOCK(sc);
 
 	ATH_LOCK(sc);
 	ath_power_restore_power_state(sc);
 	ATH_UNLOCK(sc);
 
 	ath_tx_kick(sc);
 }
 
 /*
  * Deferred processing of transmit interrupt.
  */
 static void
 ath_tx_proc(void *arg, int npending)
 {
 	struct ath_softc *sc = arg;
 	int i, nacked;
 	uint32_t txqs;
 
 	ATH_PCU_LOCK(sc);
 	sc->sc_txproc_cnt++;
 	txqs = sc->sc_txq_active;
 	sc->sc_txq_active &= ~txqs;
 	ATH_PCU_UNLOCK(sc);
 
 	ATH_LOCK(sc);
 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
 	ATH_UNLOCK(sc);
 
 	ATH_KTR(sc, ATH_KTR_TXCOMP, 1, "ath_tx_proc: txqs=0x%08x", txqs);
 
 	/*
 	 * Process each active queue.
 	 */
 	nacked = 0;
 	for (i = 0; i < HAL_NUM_TX_QUEUES; i++)
 		if (ATH_TXQ_SETUP(sc, i) && TXQACTIVE(txqs, i))
 			nacked += ath_tx_processq(sc, &sc->sc_txq[i], 1);
 	if (nacked)
 		sc->sc_lastrx = ath_hal_gettsf64(sc->sc_ah);
 
 	sc->sc_wd_timer = 0;
 
 	if (sc->sc_softled)
 		ath_led_event(sc, sc->sc_txrix);
 
 	ATH_PCU_LOCK(sc);
 	sc->sc_txproc_cnt--;
 	ATH_PCU_UNLOCK(sc);
 
 	ATH_LOCK(sc);
 	ath_power_restore_power_state(sc);
 	ATH_UNLOCK(sc);
 
 	ath_tx_kick(sc);
 }
 #undef	TXQACTIVE
 
 /*
  * Deferred processing of TXQ rescheduling.
  */
 static void
 ath_txq_sched_tasklet(void *arg, int npending)
 {
 	struct ath_softc *sc = arg;
 	int i;
 
 	/* XXX is skipping ok? */
 	ATH_PCU_LOCK(sc);
 #if 0
 	if (sc->sc_inreset_cnt > 0) {
 		device_printf(sc->sc_dev,
 		    "%s: sc_inreset_cnt > 0; skipping\n", __func__);
 		ATH_PCU_UNLOCK(sc);
 		return;
 	}
 #endif
 	sc->sc_txproc_cnt++;
 	ATH_PCU_UNLOCK(sc);
 
 	ATH_LOCK(sc);
 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
 	ATH_UNLOCK(sc);
 
 	ATH_TX_LOCK(sc);
 	for (i = 0; i < HAL_NUM_TX_QUEUES; i++) {
 		if (ATH_TXQ_SETUP(sc, i)) {
 			ath_txq_sched(sc, &sc->sc_txq[i]);
 		}
 	}
 	ATH_TX_UNLOCK(sc);
 
 	ATH_LOCK(sc);
 	ath_power_restore_power_state(sc);
 	ATH_UNLOCK(sc);
 
 	ATH_PCU_LOCK(sc);
 	sc->sc_txproc_cnt--;
 	ATH_PCU_UNLOCK(sc);
 }
 
 void
 ath_returnbuf_tail(struct ath_softc *sc, struct ath_buf *bf)
 {
 
 	ATH_TXBUF_LOCK_ASSERT(sc);
 
 	if (bf->bf_flags & ATH_BUF_MGMT)
 		TAILQ_INSERT_TAIL(&sc->sc_txbuf_mgmt, bf, bf_list);
 	else {
 		TAILQ_INSERT_TAIL(&sc->sc_txbuf, bf, bf_list);
 		sc->sc_txbuf_cnt++;
 		if (sc->sc_txbuf_cnt > ath_txbuf) {
 			device_printf(sc->sc_dev,
 			    "%s: sc_txbuf_cnt > %d?\n",
 			    __func__,
 			    ath_txbuf);
 			sc->sc_txbuf_cnt = ath_txbuf;
 		}
 	}
 }
 
 void
 ath_returnbuf_head(struct ath_softc *sc, struct ath_buf *bf)
 {
 
 	ATH_TXBUF_LOCK_ASSERT(sc);
 
 	if (bf->bf_flags & ATH_BUF_MGMT)
 		TAILQ_INSERT_HEAD(&sc->sc_txbuf_mgmt, bf, bf_list);
 	else {
 		TAILQ_INSERT_HEAD(&sc->sc_txbuf, bf, bf_list);
 		sc->sc_txbuf_cnt++;
 		if (sc->sc_txbuf_cnt > ATH_TXBUF) {
 			device_printf(sc->sc_dev,
 			    "%s: sc_txbuf_cnt > %d?\n",
 			    __func__,
 			    ATH_TXBUF);
 			sc->sc_txbuf_cnt = ATH_TXBUF;
 		}
 	}
 }
 
 /*
  * Free the holding buffer if it exists
  */
 void
 ath_txq_freeholdingbuf(struct ath_softc *sc, struct ath_txq *txq)
 {
 	ATH_TXBUF_UNLOCK_ASSERT(sc);
 	ATH_TXQ_LOCK_ASSERT(txq);
 
 	if (txq->axq_holdingbf == NULL)
 		return;
 
 	txq->axq_holdingbf->bf_flags &= ~ATH_BUF_BUSY;
 
 	ATH_TXBUF_LOCK(sc);
 	ath_returnbuf_tail(sc, txq->axq_holdingbf);
 	ATH_TXBUF_UNLOCK(sc);
 
 	txq->axq_holdingbf = NULL;
 }
 
 /*
  * Add this buffer to the holding queue, freeing the previous
  * one if it exists.
  */
 static void
 ath_txq_addholdingbuf(struct ath_softc *sc, struct ath_buf *bf)
 {
 	struct ath_txq *txq;
 
 	txq = &sc->sc_txq[bf->bf_state.bfs_tx_queue];
 
 	ATH_TXBUF_UNLOCK_ASSERT(sc);
 	ATH_TXQ_LOCK_ASSERT(txq);
 
 	/* XXX assert ATH_BUF_BUSY is set */
 
 	/* XXX assert the tx queue is under the max number */
 	if (bf->bf_state.bfs_tx_queue > HAL_NUM_TX_QUEUES) {
 		device_printf(sc->sc_dev, "%s: bf=%p: invalid tx queue (%d)\n",
 		    __func__,
 		    bf,
 		    bf->bf_state.bfs_tx_queue);
 		bf->bf_flags &= ~ATH_BUF_BUSY;
 		ath_returnbuf_tail(sc, bf);
 		return;
 	}
 	ath_txq_freeholdingbuf(sc, txq);
 	txq->axq_holdingbf = bf;
 }
 
 /*
  * Return a buffer to the pool and update the 'busy' flag on the
  * previous 'tail' entry.
  *
  * This _must_ only be called when the buffer is involved in a completed
  * TX. The logic is that if it was part of an active TX, the previous
  * buffer on the list is now not involved in a halted TX DMA queue, waiting
  * for restart (eg for TDMA.)
  *
  * The caller must free the mbuf and recycle the node reference.
  *
  * XXX This method of handling busy / holding buffers is insanely stupid.
  * It requires bf_state.bfs_tx_queue to be correctly assigned.  It would
  * be much nicer if buffers in the processq() methods would instead be
  * always completed there (pushed onto a txq or ath_bufhead) so we knew
  * exactly what hardware queue they came from in the first place.
  */
 void
 ath_freebuf(struct ath_softc *sc, struct ath_buf *bf)
 {
 	struct ath_txq *txq;
 
 	txq = &sc->sc_txq[bf->bf_state.bfs_tx_queue];
 
 	KASSERT((bf->bf_node == NULL), ("%s: bf->bf_node != NULL\n", __func__));
 	KASSERT((bf->bf_m == NULL), ("%s: bf->bf_m != NULL\n", __func__));
 
 	/*
 	 * If this buffer is busy, push it onto the holding queue.
 	 */
 	if (bf->bf_flags & ATH_BUF_BUSY) {
 		ATH_TXQ_LOCK(txq);
 		ath_txq_addholdingbuf(sc, bf);
 		ATH_TXQ_UNLOCK(txq);
 		return;
 	}
 
 	/*
 	 * Not a busy buffer, so free normally
 	 */
 	ATH_TXBUF_LOCK(sc);
 	ath_returnbuf_tail(sc, bf);
 	ATH_TXBUF_UNLOCK(sc);
 }
 
 /*
  * This is currently used by ath_tx_draintxq() and
  * ath_tx_tid_free_pkts().
  *
  * It recycles a single ath_buf.
  */
 void
 ath_tx_freebuf(struct ath_softc *sc, struct ath_buf *bf, int status)
 {
 	struct ieee80211_node *ni = bf->bf_node;
 	struct mbuf *m0 = bf->bf_m;
 
 	/*
 	 * Make sure that we only sync/unload if there's an mbuf.
 	 * If not (eg we cloned a buffer), the unload will have already
 	 * occurred.
 	 */
 	if (bf->bf_m != NULL) {
 		bus_dmamap_sync(sc->sc_dmat, bf->bf_dmamap,
 		    BUS_DMASYNC_POSTWRITE);
 		bus_dmamap_unload(sc->sc_dmat, bf->bf_dmamap);
 	}
 
 	bf->bf_node = NULL;
 	bf->bf_m = NULL;
 
 	/* Free the buffer, it's not needed any longer */
 	ath_freebuf(sc, bf);
 
 	/* Pass the buffer back to net80211 - completing it */
 	ieee80211_tx_complete(ni, m0, status);
 }
 
 static struct ath_buf *
 ath_tx_draintxq_get_one(struct ath_softc *sc, struct ath_txq *txq)
 {
 	struct ath_buf *bf;
 
 	ATH_TXQ_LOCK_ASSERT(txq);
 
 	/*
 	 * Drain the FIFO queue first, then if it's
 	 * empty, move to the normal frame queue.
 	 */
 	bf = TAILQ_FIRST(&txq->fifo.axq_q);
 	if (bf != NULL) {
 		/*
 		 * Is it the last buffer in this set?
 		 * Decrement the FIFO counter.
 		 */
 		if (bf->bf_flags & ATH_BUF_FIFOEND) {
 			if (txq->axq_fifo_depth == 0) {
 				device_printf(sc->sc_dev,
 				    "%s: Q%d: fifo_depth=0, fifo.axq_depth=%d?\n",
 				    __func__,
 				    txq->axq_qnum,
 				    txq->fifo.axq_depth);
 			} else
 				txq->axq_fifo_depth--;
 		}
 		ATH_TXQ_REMOVE(&txq->fifo, bf, bf_list);
 		return (bf);
 	}
 
 	/*
 	 * Debugging!
 	 */
 	if (txq->axq_fifo_depth != 0 || txq->fifo.axq_depth != 0) {
 		device_printf(sc->sc_dev,
 		    "%s: Q%d: fifo_depth=%d, fifo.axq_depth=%d\n",
 		    __func__,
 		    txq->axq_qnum,
 		    txq->axq_fifo_depth,
 		    txq->fifo.axq_depth);
 	}
 
 	/*
 	 * Now drain the pending queue.
 	 */
 	bf = TAILQ_FIRST(&txq->axq_q);
 	if (bf == NULL) {
 		txq->axq_link = NULL;
 		return (NULL);
 	}
 	ATH_TXQ_REMOVE(txq, bf, bf_list);
 	return (bf);
 }
 
 void
 ath_tx_draintxq(struct ath_softc *sc, struct ath_txq *txq)
 {
 #ifdef ATH_DEBUG
 	struct ath_hal *ah = sc->sc_ah;
 #endif
 	struct ath_buf *bf;
 	u_int ix;
 
 	/*
 	 * NB: this assumes output has been stopped and
 	 *     we do not need to block ath_tx_proc
 	 */
 	for (ix = 0;; ix++) {
 		ATH_TXQ_LOCK(txq);
 		bf = ath_tx_draintxq_get_one(sc, txq);
 		if (bf == NULL) {
 			ATH_TXQ_UNLOCK(txq);
 			break;
 		}
 		if (bf->bf_state.bfs_aggr)
 			txq->axq_aggr_depth--;
 #ifdef ATH_DEBUG
 		if (sc->sc_debug & ATH_DEBUG_RESET) {
 			struct ieee80211com *ic = &sc->sc_ic;
 			int status = 0;
 
 			/*
 			 * EDMA operation has a TX completion FIFO
 			 * separate from the TX descriptor, so this
 			 * method of checking the "completion" status
 			 * is wrong.
 			 */
 			if (! sc->sc_isedma) {
 				status = (ath_hal_txprocdesc(ah,
 				    bf->bf_lastds,
 				    &bf->bf_status.ds_txstat) == HAL_OK);
 			}
 			ath_printtxbuf(sc, bf, txq->axq_qnum, ix, status);
 			ieee80211_dump_pkt(ic, mtod(bf->bf_m, const uint8_t *),
 			    bf->bf_m->m_len, 0, -1);
 		}
 #endif /* ATH_DEBUG */
 		/*
 		 * Since we're now doing magic in the completion
 		 * functions, we -must- call it for aggregation
 		 * destinations or BAW tracking will get upset.
 		 */
 		/*
 		 * Clear ATH_BUF_BUSY; the completion handler
 		 * will free the buffer.
 		 */
 		ATH_TXQ_UNLOCK(txq);
 		bf->bf_flags &= ~ATH_BUF_BUSY;
 		if (bf->bf_comp)
 			bf->bf_comp(sc, bf, 1);
 		else
 			ath_tx_default_comp(sc, bf, 1);
 	}
 
 	/*
 	 * Free the holding buffer if it exists
 	 */
 	ATH_TXQ_LOCK(txq);
 	ath_txq_freeholdingbuf(sc, txq);
 	ATH_TXQ_UNLOCK(txq);
 
 	/*
 	 * Drain software queued frames which are on
 	 * active TIDs.
 	 */
 	ath_tx_txq_drain(sc, txq);
 }
 
 static void
 ath_tx_stopdma(struct ath_softc *sc, struct ath_txq *txq)
 {
 	struct ath_hal *ah = sc->sc_ah;
 
 	ATH_TXQ_LOCK_ASSERT(txq);
 
 	DPRINTF(sc, ATH_DEBUG_RESET,
 	    "%s: tx queue [%u] %p, active=%d, hwpending=%d, flags 0x%08x, "
 	    "link %p, holdingbf=%p\n",
 	    __func__,
 	    txq->axq_qnum,
 	    (caddr_t)(uintptr_t) ath_hal_gettxbuf(ah, txq->axq_qnum),
 	    (int) (!! ath_hal_txqenabled(ah, txq->axq_qnum)),
 	    (int) ath_hal_numtxpending(ah, txq->axq_qnum),
 	    txq->axq_flags,
 	    txq->axq_link,
 	    txq->axq_holdingbf);
 
 	(void) ath_hal_stoptxdma(ah, txq->axq_qnum);
 	/* We've stopped TX DMA, so mark this as stopped. */
 	txq->axq_flags &= ~ATH_TXQ_PUTRUNNING;
 
 #ifdef	ATH_DEBUG
 	if ((sc->sc_debug & ATH_DEBUG_RESET)
 	    && (txq->axq_holdingbf != NULL)) {
 		ath_printtxbuf(sc, txq->axq_holdingbf, txq->axq_qnum, 0, 0);
 	}
 #endif
 }
 
 int
 ath_stoptxdma(struct ath_softc *sc)
 {
 	struct ath_hal *ah = sc->sc_ah;
 	int i;
 
 	/* XXX return value */
 	if (sc->sc_invalid)
 		return 0;
 
 	if (!sc->sc_invalid) {
 		/* don't touch the hardware if marked invalid */
 		DPRINTF(sc, ATH_DEBUG_RESET, "%s: tx queue [%u] %p, link %p\n",
 		    __func__, sc->sc_bhalq,
 		    (caddr_t)(uintptr_t) ath_hal_gettxbuf(ah, sc->sc_bhalq),
 		    NULL);
 
 		/* stop the beacon queue */
 		(void) ath_hal_stoptxdma(ah, sc->sc_bhalq);
 
 		/* Stop the data queues */
 		for (i = 0; i < HAL_NUM_TX_QUEUES; i++) {
 			if (ATH_TXQ_SETUP(sc, i)) {
 				ATH_TXQ_LOCK(&sc->sc_txq[i]);
 				ath_tx_stopdma(sc, &sc->sc_txq[i]);
 				ATH_TXQ_UNLOCK(&sc->sc_txq[i]);
 			}
 		}
 	}
 
 	return 1;
 }
 
 #ifdef	ATH_DEBUG
 void
 ath_tx_dump(struct ath_softc *sc, struct ath_txq *txq)
 {
 	struct ath_hal *ah = sc->sc_ah;
 	struct ath_buf *bf;
 	int i = 0;
 
 	if (! (sc->sc_debug & ATH_DEBUG_RESET))
 		return;
 
 	device_printf(sc->sc_dev, "%s: Q%d: begin\n",
 	    __func__, txq->axq_qnum);
 	TAILQ_FOREACH(bf, &txq->axq_q, bf_list) {
 		ath_printtxbuf(sc, bf, txq->axq_qnum, i,
 			ath_hal_txprocdesc(ah, bf->bf_lastds,
 			    &bf->bf_status.ds_txstat) == HAL_OK);
 		i++;
 	}
 	device_printf(sc->sc_dev, "%s: Q%d: end\n",
 	    __func__, txq->axq_qnum);
 }
 #endif /* ATH_DEBUG */
 
 /*
  * Drain the transmit queues and reclaim resources.
  */
 void
 ath_legacy_tx_drain(struct ath_softc *sc, ATH_RESET_TYPE reset_type)
 {
 	struct ath_hal *ah = sc->sc_ah;
 	struct ath_buf *bf_last;
 	int i;
 
 	(void) ath_stoptxdma(sc);
 
 	/*
 	 * Dump the queue contents
 	 */
 	for (i = 0; i < HAL_NUM_TX_QUEUES; i++) {
 		/*
 		 * XXX TODO: should we just handle the completed TX frames
 		 * here, whether or not the reset is a full one or not?
 		 */
 		if (ATH_TXQ_SETUP(sc, i)) {
 #ifdef	ATH_DEBUG
 			if (sc->sc_debug & ATH_DEBUG_RESET)
 				ath_tx_dump(sc, &sc->sc_txq[i]);
 #endif	/* ATH_DEBUG */
 			if (reset_type == ATH_RESET_NOLOSS) {
 				ath_tx_processq(sc, &sc->sc_txq[i], 0);
 				ATH_TXQ_LOCK(&sc->sc_txq[i]);
 				/*
 				 * Free the holding buffer; DMA is now
 				 * stopped.
 				 */
 				ath_txq_freeholdingbuf(sc, &sc->sc_txq[i]);
 				/*
 				 * Setup the link pointer to be the
 				 * _last_ buffer/descriptor in the list.
 				 * If there's nothing in the list, set it
 				 * to NULL.
 				 */
 				bf_last = ATH_TXQ_LAST(&sc->sc_txq[i],
 				    axq_q_s);
 				if (bf_last != NULL) {
 					ath_hal_gettxdesclinkptr(ah,
 					    bf_last->bf_lastds,
 					    &sc->sc_txq[i].axq_link);
 				} else {
 					sc->sc_txq[i].axq_link = NULL;
 				}
 				ATH_TXQ_UNLOCK(&sc->sc_txq[i]);
 			} else
 				ath_tx_draintxq(sc, &sc->sc_txq[i]);
 		}
 	}
 #ifdef ATH_DEBUG
 	if (sc->sc_debug & ATH_DEBUG_RESET) {
 		struct ath_buf *bf = TAILQ_FIRST(&sc->sc_bbuf);
 		if (bf != NULL && bf->bf_m != NULL) {
 			ath_printtxbuf(sc, bf, sc->sc_bhalq, 0,
 				ath_hal_txprocdesc(ah, bf->bf_lastds,
 				    &bf->bf_status.ds_txstat) == HAL_OK);
 			ieee80211_dump_pkt(&sc->sc_ic,
 			    mtod(bf->bf_m, const uint8_t *), bf->bf_m->m_len,
 			    0, -1);
 		}
 	}
 #endif /* ATH_DEBUG */
 	sc->sc_wd_timer = 0;
 }
 
 /*
  * Update internal state after a channel change.
  */
 static void
 ath_chan_change(struct ath_softc *sc, struct ieee80211_channel *chan)
 {
 	enum ieee80211_phymode mode;
 
 	/*
 	 * Change channels and update the h/w rate map
 	 * if we're switching; e.g. 11a to 11b/g.
 	 */
 	mode = ieee80211_chan2mode(chan);
 	if (mode != sc->sc_curmode)
 		ath_setcurmode(sc, mode);
 	sc->sc_curchan = chan;
 }
 
 /*
  * Set/change channels.  If the channel is really being changed,
  * it's done by resetting the chip.  To accomplish this we must
  * first cleanup any pending DMA, then restart stuff after a la
  * ath_init.
  */
 static int
 ath_chan_set(struct ath_softc *sc, struct ieee80211_channel *chan)
 {
 	struct ieee80211com *ic = &sc->sc_ic;
 	struct ath_hal *ah = sc->sc_ah;
 	int ret = 0;
 
 	/* Treat this as an interface reset */
 	ATH_PCU_UNLOCK_ASSERT(sc);
 	ATH_UNLOCK_ASSERT(sc);
 
 	/* (Try to) stop TX/RX from occurring */
 	taskqueue_block(sc->sc_tq);
 
 	ATH_PCU_LOCK(sc);
 
 	/* Disable interrupts */
 	ath_hal_intrset(ah, 0);
 
 	/* Stop new RX/TX/interrupt completion */
 	if (ath_reset_grablock(sc, 1) == 0) {
 		device_printf(sc->sc_dev, "%s: concurrent reset! Danger!\n",
 		    __func__);
 	}
 
 	/* Stop pending RX/TX completion */
 	ath_txrx_stop_locked(sc);
 
 	ATH_PCU_UNLOCK(sc);
 
 	DPRINTF(sc, ATH_DEBUG_RESET, "%s: %u (%u MHz, flags 0x%x)\n",
 	    __func__, ieee80211_chan2ieee(ic, chan),
 	    chan->ic_freq, chan->ic_flags);
 	if (chan != sc->sc_curchan) {
 		HAL_STATUS status;
 		/*
 		 * To switch channels clear any pending DMA operations;
 		 * wait long enough for the RX fifo to drain, reset the
 		 * hardware at the new frequency, and then re-enable
 		 * the relevant bits of the h/w.
 		 */
 #if 0
 		ath_hal_intrset(ah, 0);		/* disable interrupts */
 #endif
 		ath_stoprecv(sc, 1);		/* turn off frame recv */
 		/*
 		 * First, handle completed TX/RX frames.
 		 */
 		ath_rx_flush(sc);
 		ath_draintxq(sc, ATH_RESET_NOLOSS);
 		/*
 		 * Next, flush the non-scheduled frames.
 		 */
 		ath_draintxq(sc, ATH_RESET_FULL);	/* clear pending tx frames */
 
 		ath_update_chainmasks(sc, chan);
 		ath_hal_setchainmasks(sc->sc_ah, sc->sc_cur_txchainmask,
 		    sc->sc_cur_rxchainmask);
 		if (!ath_hal_reset(ah, sc->sc_opmode, chan, AH_TRUE,
 		    HAL_RESET_NORMAL, &status)) {
 			device_printf(sc->sc_dev, "%s: unable to reset "
 			    "channel %u (%u MHz, flags 0x%x), hal status %u\n",
 			    __func__, ieee80211_chan2ieee(ic, chan),
 			    chan->ic_freq, chan->ic_flags, status);
 			ret = EIO;
 			goto finish;
 		}
 		sc->sc_diversity = ath_hal_getdiversity(ah);
 
 		ATH_RX_LOCK(sc);
 		sc->sc_rx_stopped = 1;
 		sc->sc_rx_resetted = 1;
 		ATH_RX_UNLOCK(sc);
 
 		/* Let DFS at it in case it's a DFS channel */
 		ath_dfs_radar_enable(sc, chan);
 
 		/* Let spectral at in case spectral is enabled */
 		ath_spectral_enable(sc, chan);
 
 		/*
 		 * Let bluetooth coexistence at in case it's needed for this
 		 * channel
 		 */
 		ath_btcoex_enable(sc, ic->ic_curchan);
 
 		/*
 		 * If we're doing TDMA, enforce the TXOP limitation for chips
 		 * that support it.
 		 */
 		if (sc->sc_hasenforcetxop && sc->sc_tdma)
 			ath_hal_setenforcetxop(sc->sc_ah, 1);
 		else
 			ath_hal_setenforcetxop(sc->sc_ah, 0);
 
 		/*
 		 * Re-enable rx framework.
 		 */
 		if (ath_startrecv(sc) != 0) {
 			device_printf(sc->sc_dev,
 			    "%s: unable to restart recv logic\n", __func__);
 			ret = EIO;
 			goto finish;
 		}
 
 		/*
 		 * Change channels and update the h/w rate map
 		 * if we're switching; e.g. 11a to 11b/g.
 		 */
 		ath_chan_change(sc, chan);
 
 		/*
 		 * Reset clears the beacon timers; reset them
 		 * here if needed.
 		 */
 		if (sc->sc_beacons) {		/* restart beacons */
 #ifdef IEEE80211_SUPPORT_TDMA
 			if (sc->sc_tdma)
 				ath_tdma_config(sc, NULL);
 			else
 #endif
 			ath_beacon_config(sc, NULL);
 		}
 
 		/*
 		 * Re-enable interrupts.
 		 */
 #if 0
 		ath_hal_intrset(ah, sc->sc_imask);
 #endif
 	}
 
 finish:
 	ATH_PCU_LOCK(sc);
 	sc->sc_inreset_cnt--;
 	/* XXX only do this if sc_inreset_cnt == 0? */
 	ath_hal_intrset(ah, sc->sc_imask);
 	ATH_PCU_UNLOCK(sc);
 
 	ath_txrx_start(sc);
 	/* XXX ath_start? */
 
 	return ret;
 }
 
 /*
  * Periodically recalibrate the PHY to account
  * for temperature/environment changes.
  */
 static void
 ath_calibrate(void *arg)
 {
 	struct ath_softc *sc = arg;
 	struct ath_hal *ah = sc->sc_ah;
 	struct ieee80211com *ic = &sc->sc_ic;
 	HAL_BOOL longCal, isCalDone = AH_TRUE;
 	HAL_BOOL aniCal, shortCal = AH_FALSE;
 	int nextcal;
 
 	ATH_LOCK_ASSERT(sc);
 
 	/*
 	 * Force the hardware awake for ANI work.
 	 */
 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
 
 	/* Skip trying to do this if we're in reset */
 	if (sc->sc_inreset_cnt)
 		goto restart;
 
 	if (ic->ic_flags & IEEE80211_F_SCAN)	/* defer, off channel */
 		goto restart;
 	longCal = (ticks - sc->sc_lastlongcal >= ath_longcalinterval*hz);
 	aniCal = (ticks - sc->sc_lastani >= ath_anicalinterval*hz/1000);
 	if (sc->sc_doresetcal)
 		shortCal = (ticks - sc->sc_lastshortcal >= ath_shortcalinterval*hz/1000);
 
 	DPRINTF(sc, ATH_DEBUG_CALIBRATE, "%s: shortCal=%d; longCal=%d; aniCal=%d\n", __func__, shortCal, longCal, aniCal);
 	if (aniCal) {
 		sc->sc_stats.ast_ani_cal++;
 		sc->sc_lastani = ticks;
 		ath_hal_ani_poll(ah, sc->sc_curchan);
 	}
 
 	if (longCal) {
 		sc->sc_stats.ast_per_cal++;
 		sc->sc_lastlongcal = ticks;
 		if (ath_hal_getrfgain(ah) == HAL_RFGAIN_NEED_CHANGE) {
 			/*
 			 * Rfgain is out of bounds, reset the chip
 			 * to load new gain values.
 			 */
 			DPRINTF(sc, ATH_DEBUG_CALIBRATE,
 				"%s: rfgain change\n", __func__);
 			sc->sc_stats.ast_per_rfgain++;
 			sc->sc_resetcal = 0;
 			sc->sc_doresetcal = AH_TRUE;
 			taskqueue_enqueue(sc->sc_tq, &sc->sc_resettask);
 			callout_reset(&sc->sc_cal_ch, 1, ath_calibrate, sc);
 			ath_power_restore_power_state(sc);
 			return;
 		}
 		/*
 		 * If this long cal is after an idle period, then
 		 * reset the data collection state so we start fresh.
 		 */
 		if (sc->sc_resetcal) {
 			(void) ath_hal_calreset(ah, sc->sc_curchan);
 			sc->sc_lastcalreset = ticks;
 			sc->sc_lastshortcal = ticks;
 			sc->sc_resetcal = 0;
 			sc->sc_doresetcal = AH_TRUE;
 		}
 	}
 
 	/* Only call if we're doing a short/long cal, not for ANI calibration */
 	if (shortCal || longCal) {
 		isCalDone = AH_FALSE;
 		if (ath_hal_calibrateN(ah, sc->sc_curchan, longCal, &isCalDone)) {
 			if (longCal) {
 				/*
 				 * Calibrate noise floor data again in case of change.
 				 */
 				ath_hal_process_noisefloor(ah);
 			}
 		} else {
 			DPRINTF(sc, ATH_DEBUG_ANY,
 				"%s: calibration of channel %u failed\n",
 				__func__, sc->sc_curchan->ic_freq);
 			sc->sc_stats.ast_per_calfail++;
 		}
 		if (shortCal)
 			sc->sc_lastshortcal = ticks;
 	}
 	if (!isCalDone) {
 restart:
 		/*
 		 * Use a shorter interval to potentially collect multiple
 		 * data samples required to complete calibration.  Once
 		 * we're told the work is done we drop back to a longer
 		 * interval between requests.  We're more aggressive doing
 		 * work when operating as an AP to improve operation right
 		 * after startup.
 		 */
 		sc->sc_lastshortcal = ticks;
 		nextcal = ath_shortcalinterval*hz/1000;
 		if (sc->sc_opmode != HAL_M_HOSTAP)
 			nextcal *= 10;
 		sc->sc_doresetcal = AH_TRUE;
 	} else {
 		/* nextcal should be the shortest time for next event */
 		nextcal = ath_longcalinterval*hz;
 		if (sc->sc_lastcalreset == 0)
 			sc->sc_lastcalreset = sc->sc_lastlongcal;
 		else if (ticks - sc->sc_lastcalreset >= ath_resetcalinterval*hz)
 			sc->sc_resetcal = 1;	/* setup reset next trip */
 		sc->sc_doresetcal = AH_FALSE;
 	}
 	/* ANI calibration may occur more often than short/long/resetcal */
 	if (ath_anicalinterval > 0)
 		nextcal = MIN(nextcal, ath_anicalinterval*hz/1000);
 
 	if (nextcal != 0) {
 		DPRINTF(sc, ATH_DEBUG_CALIBRATE, "%s: next +%u (%sisCalDone)\n",
 		    __func__, nextcal, isCalDone ? "" : "!");
 		callout_reset(&sc->sc_cal_ch, nextcal, ath_calibrate, sc);
 	} else {
 		DPRINTF(sc, ATH_DEBUG_CALIBRATE, "%s: calibration disabled\n",
 		    __func__);
 		/* NB: don't rearm timer */
 	}
 	/*
 	 * Restore power state now that we're done.
 	 */
 	ath_power_restore_power_state(sc);
 }
 
 static void
 ath_scan_start(struct ieee80211com *ic)
 {
 	struct ath_softc *sc = ic->ic_softc;
 	struct ath_hal *ah = sc->sc_ah;
 	u_int32_t rfilt;
 
 	/* XXX calibration timer? */
 	/* XXXGL: is constant ieee80211broadcastaddr a correct choice? */
 
 	ATH_LOCK(sc);
 	sc->sc_scanning = 1;
 	sc->sc_syncbeacon = 0;
 	rfilt = ath_calcrxfilter(sc);
 	ATH_UNLOCK(sc);
 
 	ATH_PCU_LOCK(sc);
 	ath_hal_setrxfilter(ah, rfilt);
 	ath_hal_setassocid(ah, ieee80211broadcastaddr, 0);
 	ATH_PCU_UNLOCK(sc);
 
 	DPRINTF(sc, ATH_DEBUG_STATE, "%s: RX filter 0x%x bssid %s aid 0\n",
 		 __func__, rfilt, ether_sprintf(ieee80211broadcastaddr));
 }
 
 static void
 ath_scan_end(struct ieee80211com *ic)
 {
 	struct ath_softc *sc = ic->ic_softc;
 	struct ath_hal *ah = sc->sc_ah;
 	u_int32_t rfilt;
 
 	ATH_LOCK(sc);
 	sc->sc_scanning = 0;
 	rfilt = ath_calcrxfilter(sc);
 	ATH_UNLOCK(sc);
 
 	ATH_PCU_LOCK(sc);
 	ath_hal_setrxfilter(ah, rfilt);
 	ath_hal_setassocid(ah, sc->sc_curbssid, sc->sc_curaid);
 
 	ath_hal_process_noisefloor(ah);
 	ATH_PCU_UNLOCK(sc);
 
 	DPRINTF(sc, ATH_DEBUG_STATE, "%s: RX filter 0x%x bssid %s aid 0x%x\n",
 		 __func__, rfilt, ether_sprintf(sc->sc_curbssid),
 		 sc->sc_curaid);
 }
 
 #ifdef	ATH_ENABLE_11N
 /*
  * For now, just do a channel change.
  *
  * Later, we'll go through the hard slog of suspending tx/rx, changing rate
  * control state and resetting the hardware without dropping frames out
  * of the queue.
  *
  * The unfortunate trouble here is making absolutely sure that the
  * channel width change has propagated enough so the hardware
  * absolutely isn't handed bogus frames for it's current operating
  * mode. (Eg, 40MHz frames in 20MHz mode.) Since TX and RX can and
  * does occur in parallel, we need to make certain we've blocked
  * any further ongoing TX (and RX, that can cause raw TX)
  * before we do this.
  */
 static void
 ath_update_chw(struct ieee80211com *ic)
 {
 	struct ath_softc *sc = ic->ic_softc;
 
 	DPRINTF(sc, ATH_DEBUG_STATE, "%s: called\n", __func__);
 	ath_set_channel(ic);
 }
 #endif	/* ATH_ENABLE_11N */
 
 static void
 ath_set_channel(struct ieee80211com *ic)
 {
 	struct ath_softc *sc = ic->ic_softc;
 
 	ATH_LOCK(sc);
 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
 	ATH_UNLOCK(sc);
 
 	(void) ath_chan_set(sc, ic->ic_curchan);
 	/*
 	 * If we are returning to our bss channel then mark state
 	 * so the next recv'd beacon's tsf will be used to sync the
 	 * beacon timers.  Note that since we only hear beacons in
 	 * sta/ibss mode this has no effect in other operating modes.
 	 */
 	ATH_LOCK(sc);
 	if (!sc->sc_scanning && ic->ic_curchan == ic->ic_bsschan)
 		sc->sc_syncbeacon = 1;
 	ath_power_restore_power_state(sc);
 	ATH_UNLOCK(sc);
 }
 
 /*
  * Walk the vap list and check if there any vap's in RUN state.
  */
 static int
 ath_isanyrunningvaps(struct ieee80211vap *this)
 {
 	struct ieee80211com *ic = this->iv_ic;
 	struct ieee80211vap *vap;
 
 	IEEE80211_LOCK_ASSERT(ic);
 
 	TAILQ_FOREACH(vap, &ic->ic_vaps, iv_next) {
 		if (vap != this && vap->iv_state >= IEEE80211_S_RUN)
 			return 1;
 	}
 	return 0;
 }
 
 static int
 ath_newstate(struct ieee80211vap *vap, enum ieee80211_state nstate, int arg)
 {
 	struct ieee80211com *ic = vap->iv_ic;
 	struct ath_softc *sc = ic->ic_softc;
 	struct ath_vap *avp = ATH_VAP(vap);
 	struct ath_hal *ah = sc->sc_ah;
 	struct ieee80211_node *ni = NULL;
 	int i, error, stamode;
 	u_int32_t rfilt;
 	int csa_run_transition = 0;
 	enum ieee80211_state ostate = vap->iv_state;
 
 	static const HAL_LED_STATE leds[] = {
 	    HAL_LED_INIT,	/* IEEE80211_S_INIT */
 	    HAL_LED_SCAN,	/* IEEE80211_S_SCAN */
 	    HAL_LED_AUTH,	/* IEEE80211_S_AUTH */
 	    HAL_LED_ASSOC, 	/* IEEE80211_S_ASSOC */
 	    HAL_LED_RUN, 	/* IEEE80211_S_CAC */
 	    HAL_LED_RUN, 	/* IEEE80211_S_RUN */
 	    HAL_LED_RUN, 	/* IEEE80211_S_CSA */
 	    HAL_LED_RUN, 	/* IEEE80211_S_SLEEP */
 	};
 
 	DPRINTF(sc, ATH_DEBUG_STATE, "%s: %s -> %s\n", __func__,
 		ieee80211_state_name[ostate],
 		ieee80211_state_name[nstate]);
 
 	/*
 	 * net80211 _should_ have the comlock asserted at this point.
 	 * There are some comments around the calls to vap->iv_newstate
 	 * which indicate that it (newstate) may end up dropping the
 	 * lock.  This and the subsequent lock assert check after newstate
 	 * are an attempt to catch these and figure out how/why.
 	 */
 	IEEE80211_LOCK_ASSERT(ic);
 
 	/* Before we touch the hardware - wake it up */
 	ATH_LOCK(sc);
 	/*
 	 * If the NIC is in anything other than SLEEP state,
 	 * we need to ensure that self-generated frames are
 	 * set for PWRMGT=0.  Otherwise we may end up with
 	 * strange situations.
 	 *
 	 * XXX TODO: is this actually the case? :-)
 	 */
 	if (nstate != IEEE80211_S_SLEEP)
 		ath_power_setselfgen(sc, HAL_PM_AWAKE);
 
 	/*
 	 * Now, wake the thing up.
 	 */
 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
 
 	/*
 	 * And stop the calibration callout whilst we have
 	 * ATH_LOCK held.
 	 */
 	callout_stop(&sc->sc_cal_ch);
 	ATH_UNLOCK(sc);
 
 	if (ostate == IEEE80211_S_CSA && nstate == IEEE80211_S_RUN)
 		csa_run_transition = 1;
 
 	ath_hal_setledstate(ah, leds[nstate]);	/* set LED */
 
 	if (nstate == IEEE80211_S_SCAN) {
 		/*
 		 * Scanning: turn off beacon miss and don't beacon.
 		 * Mark beacon state so when we reach RUN state we'll
 		 * [re]setup beacons.  Unblock the task q thread so
 		 * deferred interrupt processing is done.
 		 */
 
 		/* Ensure we stay awake during scan */
 		ATH_LOCK(sc);
 		ath_power_setselfgen(sc, HAL_PM_AWAKE);
-		ath_power_setpower(sc, HAL_PM_AWAKE);
+		ath_power_setpower(sc, HAL_PM_AWAKE, 1);
 		ATH_UNLOCK(sc);
 
 		ath_hal_intrset(ah,
 		    sc->sc_imask &~ (HAL_INT_SWBA | HAL_INT_BMISS));
 		sc->sc_imask &= ~(HAL_INT_SWBA | HAL_INT_BMISS);
 		sc->sc_beacons = 0;
 		taskqueue_unblock(sc->sc_tq);
 	}
 
 	ni = ieee80211_ref_node(vap->iv_bss);
 	rfilt = ath_calcrxfilter(sc);
 	stamode = (vap->iv_opmode == IEEE80211_M_STA ||
 		   vap->iv_opmode == IEEE80211_M_AHDEMO ||
 		   vap->iv_opmode == IEEE80211_M_IBSS);
 
 	/*
 	 * XXX Dont need to do this (and others) if we've transitioned
 	 * from SLEEP->RUN.
 	 */
 	if (stamode && nstate == IEEE80211_S_RUN) {
 		sc->sc_curaid = ni->ni_associd;
 		IEEE80211_ADDR_COPY(sc->sc_curbssid, ni->ni_bssid);
 		ath_hal_setassocid(ah, sc->sc_curbssid, sc->sc_curaid);
 	}
 	DPRINTF(sc, ATH_DEBUG_STATE, "%s: RX filter 0x%x bssid %s aid 0x%x\n",
 	   __func__, rfilt, ether_sprintf(sc->sc_curbssid), sc->sc_curaid);
 	ath_hal_setrxfilter(ah, rfilt);
 
 	/* XXX is this to restore keycache on resume? */
 	if (vap->iv_opmode != IEEE80211_M_STA &&
 	    (vap->iv_flags & IEEE80211_F_PRIVACY)) {
 		for (i = 0; i < IEEE80211_WEP_NKID; i++)
 			if (ath_hal_keyisvalid(ah, i))
 				ath_hal_keysetmac(ah, i, ni->ni_bssid);
 	}
 
 	/*
 	 * Invoke the parent method to do net80211 work.
 	 */
 	error = avp->av_newstate(vap, nstate, arg);
 	if (error != 0)
 		goto bad;
 
 	/*
 	 * See above: ensure av_newstate() doesn't drop the lock
 	 * on us.
 	 */
 	IEEE80211_LOCK_ASSERT(ic);
 
 	if (nstate == IEEE80211_S_RUN) {
 		/* NB: collect bss node again, it may have changed */
 		ieee80211_free_node(ni);
 		ni = ieee80211_ref_node(vap->iv_bss);
 
 		DPRINTF(sc, ATH_DEBUG_STATE,
 		    "%s(RUN): iv_flags 0x%08x bintvl %d bssid %s "
 		    "capinfo 0x%04x chan %d\n", __func__,
 		    vap->iv_flags, ni->ni_intval, ether_sprintf(ni->ni_bssid),
 		    ni->ni_capinfo, ieee80211_chan2ieee(ic, ic->ic_curchan));
 
 		switch (vap->iv_opmode) {
 #ifdef IEEE80211_SUPPORT_TDMA
 		case IEEE80211_M_AHDEMO:
 			if ((vap->iv_caps & IEEE80211_C_TDMA) == 0)
 				break;
 			/* fall thru... */
 #endif
 		case IEEE80211_M_HOSTAP:
 		case IEEE80211_M_IBSS:
 		case IEEE80211_M_MBSS:
 			/*
 			 * Allocate and setup the beacon frame.
 			 *
 			 * Stop any previous beacon DMA.  This may be
 			 * necessary, for example, when an ibss merge
 			 * causes reconfiguration; there will be a state
 			 * transition from RUN->RUN that means we may
 			 * be called with beacon transmission active.
 			 */
 			ath_hal_stoptxdma(ah, sc->sc_bhalq);
 
 			error = ath_beacon_alloc(sc, ni);
 			if (error != 0)
 				goto bad;
 			/*
 			 * If joining an adhoc network defer beacon timer
 			 * configuration to the next beacon frame so we
 			 * have a current TSF to use.  Otherwise we're
 			 * starting an ibss/bss so there's no need to delay;
 			 * if this is the first vap moving to RUN state, then
 			 * beacon state needs to be [re]configured.
 			 */
 			if (vap->iv_opmode == IEEE80211_M_IBSS &&
 			    ni->ni_tstamp.tsf != 0) {
 				sc->sc_syncbeacon = 1;
 			} else if (!sc->sc_beacons) {
 #ifdef IEEE80211_SUPPORT_TDMA
 				if (vap->iv_caps & IEEE80211_C_TDMA)
 					ath_tdma_config(sc, vap);
 				else
 #endif
 					ath_beacon_config(sc, vap);
 				sc->sc_beacons = 1;
 			}
 			break;
 		case IEEE80211_M_STA:
 			/*
 			 * Defer beacon timer configuration to the next
 			 * beacon frame so we have a current TSF to use
 			 * (any TSF collected when scanning is likely old).
 			 * However if it's due to a CSA -> RUN transition,
 			 * force a beacon update so we pick up a lack of
 			 * beacons from an AP in CAC and thus force a
 			 * scan.
 			 *
 			 * And, there's also corner cases here where
 			 * after a scan, the AP may have disappeared.
 			 * In that case, we may not receive an actual
 			 * beacon to update the beacon timer and thus we
 			 * won't get notified of the missing beacons.
 			 */
 			if (ostate != IEEE80211_S_RUN &&
 			    ostate != IEEE80211_S_SLEEP) {
 				DPRINTF(sc, ATH_DEBUG_BEACON,
 				    "%s: STA; syncbeacon=1\n", __func__);
 				sc->sc_syncbeacon = 1;
 
 				if (csa_run_transition)
 					ath_beacon_config(sc, vap);
 
 			/*
 			 * PR: kern/175227
 			 *
 			 * Reconfigure beacons during reset; as otherwise
 			 * we won't get the beacon timers reprogrammed
 			 * after a reset and thus we won't pick up a
 			 * beacon miss interrupt.
 			 *
 			 * Hopefully we'll see a beacon before the BMISS
 			 * timer fires (too often), leading to a STA
 			 * disassociation.
 			 */
 				sc->sc_beacons = 1;
 			}
 			break;
 		case IEEE80211_M_MONITOR:
 			/*
 			 * Monitor mode vaps have only INIT->RUN and RUN->RUN
 			 * transitions so we must re-enable interrupts here to
 			 * handle the case of a single monitor mode vap.
 			 */
 			ath_hal_intrset(ah, sc->sc_imask);
 			break;
 		case IEEE80211_M_WDS:
 			break;
 		default:
 			break;
 		}
 		/*
 		 * Let the hal process statistics collected during a
 		 * scan so it can provide calibrated noise floor data.
 		 */
 		ath_hal_process_noisefloor(ah);
 		/*
 		 * Reset rssi stats; maybe not the best place...
 		 */
 		sc->sc_halstats.ns_avgbrssi = ATH_RSSI_DUMMY_MARKER;
 		sc->sc_halstats.ns_avgrssi = ATH_RSSI_DUMMY_MARKER;
 		sc->sc_halstats.ns_avgtxrssi = ATH_RSSI_DUMMY_MARKER;
 
 		/*
 		 * Force awake for RUN mode.
 		 */
 		ATH_LOCK(sc);
 		ath_power_setselfgen(sc, HAL_PM_AWAKE);
-		ath_power_setpower(sc, HAL_PM_AWAKE);
+		ath_power_setpower(sc, HAL_PM_AWAKE, 1);
 
 		/*
 		 * Finally, start any timers and the task q thread
 		 * (in case we didn't go through SCAN state).
 		 */
 		if (ath_longcalinterval != 0) {
 			/* start periodic recalibration timer */
 			callout_reset(&sc->sc_cal_ch, 1, ath_calibrate, sc);
 		} else {
 			DPRINTF(sc, ATH_DEBUG_CALIBRATE,
 			    "%s: calibration disabled\n", __func__);
 		}
 		ATH_UNLOCK(sc);
 
 		taskqueue_unblock(sc->sc_tq);
 	} else if (nstate == IEEE80211_S_INIT) {
 		/*
 		 * If there are no vaps left in RUN state then
 		 * shutdown host/driver operation:
 		 * o disable interrupts
 		 * o disable the task queue thread
 		 * o mark beacon processing as stopped
 		 */
 		if (!ath_isanyrunningvaps(vap)) {
 			sc->sc_imask &= ~(HAL_INT_SWBA | HAL_INT_BMISS);
 			/* disable interrupts  */
 			ath_hal_intrset(ah, sc->sc_imask &~ HAL_INT_GLOBAL);
 			taskqueue_block(sc->sc_tq);
 			sc->sc_beacons = 0;
 		}
 #ifdef IEEE80211_SUPPORT_TDMA
 		ath_hal_setcca(ah, AH_TRUE);
 #endif
 	} else if (nstate == IEEE80211_S_SLEEP) {
 		/* We're going to sleep, so transition appropriately */
 		/* For now, only do this if we're a single STA vap */
 		if (sc->sc_nvaps == 1 &&
 		    vap->iv_opmode == IEEE80211_M_STA) {
 			DPRINTF(sc, ATH_DEBUG_BEACON, "%s: syncbeacon=%d\n", __func__, sc->sc_syncbeacon);
 			ATH_LOCK(sc);
 			/*
 			 * Always at least set the self-generated
 			 * frame config to set PWRMGT=1.
 			 */
 			ath_power_setselfgen(sc, HAL_PM_NETWORK_SLEEP);
 
 			/*
 			 * If we're not syncing beacons, transition
 			 * to NETWORK_SLEEP.
 			 *
 			 * We stay awake if syncbeacon > 0 in case
 			 * we need to listen for some beacons otherwise
 			 * our beacon timer config may be wrong.
 			 */
 			if (sc->sc_syncbeacon == 0) {
-				ath_power_setpower(sc, HAL_PM_NETWORK_SLEEP);
+				ath_power_setpower(sc, HAL_PM_NETWORK_SLEEP, 1);
 			}
 			ATH_UNLOCK(sc);
 		}
 	}
 bad:
 	ieee80211_free_node(ni);
 
 	/*
 	 * Restore the power state - either to what it was, or
 	 * to network_sleep if it's alright.
 	 */
 	ATH_LOCK(sc);
 	ath_power_restore_power_state(sc);
 	ATH_UNLOCK(sc);
 	return error;
 }
 
 /*
  * Allocate a key cache slot to the station so we can
  * setup a mapping from key index to node. The key cache
  * slot is needed for managing antenna state and for
  * compression when stations do not use crypto.  We do
  * it uniliaterally here; if crypto is employed this slot
  * will be reassigned.
  */
 static void
 ath_setup_stationkey(struct ieee80211_node *ni)
 {
 	struct ieee80211vap *vap = ni->ni_vap;
 	struct ath_softc *sc = vap->iv_ic->ic_softc;
 	ieee80211_keyix keyix, rxkeyix;
 
 	/* XXX should take a locked ref to vap->iv_bss */
 	if (!ath_key_alloc(vap, &ni->ni_ucastkey, &keyix, &rxkeyix)) {
 		/*
 		 * Key cache is full; we'll fall back to doing
 		 * the more expensive lookup in software.  Note
 		 * this also means no h/w compression.
 		 */
 		/* XXX msg+statistic */
 	} else {
 		/* XXX locking? */
 		ni->ni_ucastkey.wk_keyix = keyix;
 		ni->ni_ucastkey.wk_rxkeyix = rxkeyix;
 		/* NB: must mark device key to get called back on delete */
 		ni->ni_ucastkey.wk_flags |= IEEE80211_KEY_DEVKEY;
 		IEEE80211_ADDR_COPY(ni->ni_ucastkey.wk_macaddr, ni->ni_macaddr);
 		/* NB: this will create a pass-thru key entry */
 		ath_keyset(sc, vap, &ni->ni_ucastkey, vap->iv_bss);
 	}
 }
 
 /*
  * Setup driver-specific state for a newly associated node.
  * Note that we're called also on a re-associate, the isnew
  * param tells us if this is the first time or not.
  */
 static void
 ath_newassoc(struct ieee80211_node *ni, int isnew)
 {
 	struct ath_node *an = ATH_NODE(ni);
 	struct ieee80211vap *vap = ni->ni_vap;
 	struct ath_softc *sc = vap->iv_ic->ic_softc;
 	const struct ieee80211_txparam *tp = ni->ni_txparms;
 
 	an->an_mcastrix = ath_tx_findrix(sc, tp->mcastrate);
 	an->an_mgmtrix = ath_tx_findrix(sc, tp->mgmtrate);
 
 	DPRINTF(sc, ATH_DEBUG_NODE, "%s: %6D: reassoc; isnew=%d, is_powersave=%d\n",
 	    __func__,
 	    ni->ni_macaddr,
 	    ":",
 	    isnew,
 	    an->an_is_powersave);
 
 	ATH_NODE_LOCK(an);
 	ath_rate_newassoc(sc, an, isnew);
 	ATH_NODE_UNLOCK(an);
 
 	if (isnew &&
 	    (vap->iv_flags & IEEE80211_F_PRIVACY) == 0 && sc->sc_hasclrkey &&
 	    ni->ni_ucastkey.wk_keyix == IEEE80211_KEYIX_NONE)
 		ath_setup_stationkey(ni);
 
 	/*
 	 * If we're reassociating, make sure that any paused queues
 	 * get unpaused.
 	 *
 	 * Now, we may have frames in the hardware queue for this node.
 	 * So if we are reassociating and there are frames in the queue,
 	 * we need to go through the cleanup path to ensure that they're
 	 * marked as non-aggregate.
 	 */
 	if (! isnew) {
 		DPRINTF(sc, ATH_DEBUG_NODE,
 		    "%s: %6D: reassoc; is_powersave=%d\n",
 		    __func__,
 		    ni->ni_macaddr,
 		    ":",
 		    an->an_is_powersave);
 
 		/* XXX for now, we can't hold the lock across assoc */
 		ath_tx_node_reassoc(sc, an);
 
 		/* XXX for now, we can't hold the lock across wakeup */
 		if (an->an_is_powersave)
 			ath_tx_node_wakeup(sc, an);
 	}
 }
 
 static int
 ath_setregdomain(struct ieee80211com *ic, struct ieee80211_regdomain *reg,
 	int nchans, struct ieee80211_channel chans[])
 {
 	struct ath_softc *sc = ic->ic_softc;
 	struct ath_hal *ah = sc->sc_ah;
 	HAL_STATUS status;
 
 	DPRINTF(sc, ATH_DEBUG_REGDOMAIN,
 	    "%s: rd %u cc %u location %c%s\n",
 	    __func__, reg->regdomain, reg->country, reg->location,
 	    reg->ecm ? " ecm" : "");
 
 	status = ath_hal_set_channels(ah, chans, nchans,
 	    reg->country, reg->regdomain);
 	if (status != HAL_OK) {
 		DPRINTF(sc, ATH_DEBUG_REGDOMAIN, "%s: failed, status %u\n",
 		    __func__, status);
 		return EINVAL;		/* XXX */
 	}
 
 	return 0;
 }
 
 static void
 ath_getradiocaps(struct ieee80211com *ic,
 	int maxchans, int *nchans, struct ieee80211_channel chans[])
 {
 	struct ath_softc *sc = ic->ic_softc;
 	struct ath_hal *ah = sc->sc_ah;
 
 	DPRINTF(sc, ATH_DEBUG_REGDOMAIN, "%s: use rd %u cc %d\n",
 	    __func__, SKU_DEBUG, CTRY_DEFAULT);
 
 	/* XXX check return */
 	(void) ath_hal_getchannels(ah, chans, maxchans, nchans,
 	    HAL_MODE_ALL, CTRY_DEFAULT, SKU_DEBUG, AH_TRUE);
 
 }
 
 static int
 ath_getchannels(struct ath_softc *sc)
 {
 	struct ieee80211com *ic = &sc->sc_ic;
 	struct ath_hal *ah = sc->sc_ah;
 	HAL_STATUS status;
 
 	/*
 	 * Collect channel set based on EEPROM contents.
 	 */
 	status = ath_hal_init_channels(ah, ic->ic_channels, IEEE80211_CHAN_MAX,
 	    &ic->ic_nchans, HAL_MODE_ALL, CTRY_DEFAULT, SKU_NONE, AH_TRUE);
 	if (status != HAL_OK) {
 		device_printf(sc->sc_dev,
 		    "%s: unable to collect channel list from hal, status %d\n",
 		    __func__, status);
 		return EINVAL;
 	}
 	(void) ath_hal_getregdomain(ah, &sc->sc_eerd);
 	ath_hal_getcountrycode(ah, &sc->sc_eecc);	/* NB: cannot fail */
 	/* XXX map Atheros sku's to net80211 SKU's */
 	/* XXX net80211 types too small */
 	ic->ic_regdomain.regdomain = (uint16_t) sc->sc_eerd;
 	ic->ic_regdomain.country = (uint16_t) sc->sc_eecc;
 	ic->ic_regdomain.isocc[0] = ' ';	/* XXX don't know */
 	ic->ic_regdomain.isocc[1] = ' ';
 
 	ic->ic_regdomain.ecm = 1;
 	ic->ic_regdomain.location = 'I';
 
 	DPRINTF(sc, ATH_DEBUG_REGDOMAIN,
 	    "%s: eeprom rd %u cc %u (mapped rd %u cc %u) location %c%s\n",
 	    __func__, sc->sc_eerd, sc->sc_eecc,
 	    ic->ic_regdomain.regdomain, ic->ic_regdomain.country,
 	    ic->ic_regdomain.location, ic->ic_regdomain.ecm ? " ecm" : "");
 	return 0;
 }
 
 static int
 ath_rate_setup(struct ath_softc *sc, u_int mode)
 {
 	struct ath_hal *ah = sc->sc_ah;
 	const HAL_RATE_TABLE *rt;
 
 	switch (mode) {
 	case IEEE80211_MODE_11A:
 		rt = ath_hal_getratetable(ah, HAL_MODE_11A);
 		break;
 	case IEEE80211_MODE_HALF:
 		rt = ath_hal_getratetable(ah, HAL_MODE_11A_HALF_RATE);
 		break;
 	case IEEE80211_MODE_QUARTER:
 		rt = ath_hal_getratetable(ah, HAL_MODE_11A_QUARTER_RATE);
 		break;
 	case IEEE80211_MODE_11B:
 		rt = ath_hal_getratetable(ah, HAL_MODE_11B);
 		break;
 	case IEEE80211_MODE_11G:
 		rt = ath_hal_getratetable(ah, HAL_MODE_11G);
 		break;
 	case IEEE80211_MODE_TURBO_A:
 		rt = ath_hal_getratetable(ah, HAL_MODE_108A);
 		break;
 	case IEEE80211_MODE_TURBO_G:
 		rt = ath_hal_getratetable(ah, HAL_MODE_108G);
 		break;
 	case IEEE80211_MODE_STURBO_A:
 		rt = ath_hal_getratetable(ah, HAL_MODE_TURBO);
 		break;
 	case IEEE80211_MODE_11NA:
 		rt = ath_hal_getratetable(ah, HAL_MODE_11NA_HT20);
 		break;
 	case IEEE80211_MODE_11NG:
 		rt = ath_hal_getratetable(ah, HAL_MODE_11NG_HT20);
 		break;
 	default:
 		DPRINTF(sc, ATH_DEBUG_ANY, "%s: invalid mode %u\n",
 			__func__, mode);
 		return 0;
 	}
 	sc->sc_rates[mode] = rt;
 	return (rt != NULL);
 }
 
 static void
 ath_setcurmode(struct ath_softc *sc, enum ieee80211_phymode mode)
 {
 	/* NB: on/off times from the Atheros NDIS driver, w/ permission */
 	static const struct {
 		u_int		rate;		/* tx/rx 802.11 rate */
 		u_int16_t	timeOn;		/* LED on time (ms) */
 		u_int16_t	timeOff;	/* LED off time (ms) */
 	} blinkrates[] = {
 		{ 108,  40,  10 },
 		{  96,  44,  11 },
 		{  72,  50,  13 },
 		{  48,  57,  14 },
 		{  36,  67,  16 },
 		{  24,  80,  20 },
 		{  22, 100,  25 },
 		{  18, 133,  34 },
 		{  12, 160,  40 },
 		{  10, 200,  50 },
 		{   6, 240,  58 },
 		{   4, 267,  66 },
 		{   2, 400, 100 },
 		{   0, 500, 130 },
 		/* XXX half/quarter rates */
 	};
 	const HAL_RATE_TABLE *rt;
 	int i, j;
 
 	memset(sc->sc_rixmap, 0xff, sizeof(sc->sc_rixmap));
 	rt = sc->sc_rates[mode];
 	KASSERT(rt != NULL, ("no h/w rate set for phy mode %u", mode));
 	for (i = 0; i < rt->rateCount; i++) {
 		uint8_t ieeerate = rt->info[i].dot11Rate & IEEE80211_RATE_VAL;
 		if (rt->info[i].phy != IEEE80211_T_HT)
 			sc->sc_rixmap[ieeerate] = i;
 		else
 			sc->sc_rixmap[ieeerate | IEEE80211_RATE_MCS] = i;
 	}
 	memset(sc->sc_hwmap, 0, sizeof(sc->sc_hwmap));
 	for (i = 0; i < nitems(sc->sc_hwmap); i++) {
 		if (i >= rt->rateCount) {
 			sc->sc_hwmap[i].ledon = (500 * hz) / 1000;
 			sc->sc_hwmap[i].ledoff = (130 * hz) / 1000;
 			continue;
 		}
 		sc->sc_hwmap[i].ieeerate =
 			rt->info[i].dot11Rate & IEEE80211_RATE_VAL;
 		if (rt->info[i].phy == IEEE80211_T_HT)
 			sc->sc_hwmap[i].ieeerate |= IEEE80211_RATE_MCS;
 		sc->sc_hwmap[i].txflags = IEEE80211_RADIOTAP_F_DATAPAD;
 		if (rt->info[i].shortPreamble ||
 		    rt->info[i].phy == IEEE80211_T_OFDM)
 			sc->sc_hwmap[i].txflags |= IEEE80211_RADIOTAP_F_SHORTPRE;
 		sc->sc_hwmap[i].rxflags = sc->sc_hwmap[i].txflags;
 		for (j = 0; j < nitems(blinkrates)-1; j++)
 			if (blinkrates[j].rate == sc->sc_hwmap[i].ieeerate)
 				break;
 		/* NB: this uses the last entry if the rate isn't found */
 		/* XXX beware of overlow */
 		sc->sc_hwmap[i].ledon = (blinkrates[j].timeOn * hz) / 1000;
 		sc->sc_hwmap[i].ledoff = (blinkrates[j].timeOff * hz) / 1000;
 	}
 	sc->sc_currates = rt;
 	sc->sc_curmode = mode;
 	/*
 	 * All protection frames are transmitted at 2Mb/s for
 	 * 11g, otherwise at 1Mb/s.
 	 */
 	if (mode == IEEE80211_MODE_11G)
 		sc->sc_protrix = ath_tx_findrix(sc, 2*2);
 	else
 		sc->sc_protrix = ath_tx_findrix(sc, 2*1);
 	/* NB: caller is responsible for resetting rate control state */
 }
 
 static void
 ath_watchdog(void *arg)
 {
 	struct ath_softc *sc = arg;
 	struct ieee80211com *ic = &sc->sc_ic;
 	int do_reset = 0;
 
 	ATH_LOCK_ASSERT(sc);
 
 	if (sc->sc_wd_timer != 0 && --sc->sc_wd_timer == 0) {
 		uint32_t hangs;
 
 		ath_power_set_power_state(sc, HAL_PM_AWAKE);
 
 		if (ath_hal_gethangstate(sc->sc_ah, 0xffff, &hangs) &&
 		    hangs != 0) {
 			device_printf(sc->sc_dev, "%s hang detected (0x%x)\n",
 			    hangs & 0xff ? "bb" : "mac", hangs);
 		} else
 			device_printf(sc->sc_dev, "device timeout\n");
 		do_reset = 1;
 		counter_u64_add(ic->ic_oerrors, 1);
 		sc->sc_stats.ast_watchdog++;
 
 		ath_power_restore_power_state(sc);
 	}
 
 	/*
 	 * We can't hold the lock across the ath_reset() call.
 	 *
 	 * And since this routine can't hold a lock and sleep,
 	 * do the reset deferred.
 	 */
 	if (do_reset) {
 		taskqueue_enqueue(sc->sc_tq, &sc->sc_resettask);
 	}
 
 	callout_schedule(&sc->sc_wd_ch, hz);
 }
 
 static void
 ath_parent(struct ieee80211com *ic)
 {
 	struct ath_softc *sc = ic->ic_softc;
 	int error = EDOOFUS;
 
 	ATH_LOCK(sc);
 	if (ic->ic_nrunning > 0) {
 		/*
 		 * To avoid rescanning another access point,
 		 * do not call ath_init() here.  Instead,
 		 * only reflect promisc mode settings.
 		 */
 		if (sc->sc_running) {
 			ath_power_set_power_state(sc, HAL_PM_AWAKE);
 			ath_mode_init(sc);
 			ath_power_restore_power_state(sc);
 		} else if (!sc->sc_invalid) {
 			/*
 			 * Beware of being called during attach/detach
 			 * to reset promiscuous mode.  In that case we
 			 * will still be marked UP but not RUNNING.
 			 * However trying to re-init the interface
 			 * is the wrong thing to do as we've already
 			 * torn down much of our state.  There's
 			 * probably a better way to deal with this.
 			 */
 			error = ath_init(sc);
 		}
 	} else {
 		ath_stop(sc);
 		if (!sc->sc_invalid)
-			ath_power_setpower(sc, HAL_PM_FULL_SLEEP);
+			ath_power_setpower(sc, HAL_PM_FULL_SLEEP, 1);
 	}
 	ATH_UNLOCK(sc);
 
 	if (error == 0) {                        
 #ifdef ATH_TX99_DIAG
 		if (sc->sc_tx99 != NULL)
 			sc->sc_tx99->start(sc->sc_tx99);
 		else
 #endif
 		ieee80211_start_all(ic);
 	}
 }
 
 /*
  * Announce various information on device/driver attach.
  */
 static void
 ath_announce(struct ath_softc *sc)
 {
 	struct ath_hal *ah = sc->sc_ah;
 
 	device_printf(sc->sc_dev, "%s mac %d.%d RF%s phy %d.%d\n",
 		ath_hal_mac_name(ah), ah->ah_macVersion, ah->ah_macRev,
 		ath_hal_rf_name(ah), ah->ah_phyRev >> 4, ah->ah_phyRev & 0xf);
 	device_printf(sc->sc_dev, "2GHz radio: 0x%.4x; 5GHz radio: 0x%.4x\n",
 		ah->ah_analog2GhzRev, ah->ah_analog5GhzRev);
 	if (bootverbose) {
 		int i;
 		for (i = 0; i <= WME_AC_VO; i++) {
 			struct ath_txq *txq = sc->sc_ac2q[i];
 			device_printf(sc->sc_dev,
 			    "Use hw queue %u for %s traffic\n",
 			    txq->axq_qnum, ieee80211_wme_acnames[i]);
 		}
 		device_printf(sc->sc_dev, "Use hw queue %u for CAB traffic\n",
 		    sc->sc_cabq->axq_qnum);
 		device_printf(sc->sc_dev, "Use hw queue %u for beacons\n",
 		    sc->sc_bhalq);
 	}
 	if (ath_rxbuf != ATH_RXBUF)
 		device_printf(sc->sc_dev, "using %u rx buffers\n", ath_rxbuf);
 	if (ath_txbuf != ATH_TXBUF)
 		device_printf(sc->sc_dev, "using %u tx buffers\n", ath_txbuf);
 	if (sc->sc_mcastkey && bootverbose)
 		device_printf(sc->sc_dev, "using multicast key search\n");
 }
 
 static void
 ath_dfs_tasklet(void *p, int npending)
 {
 	struct ath_softc *sc = (struct ath_softc *) p;
 	struct ieee80211com *ic = &sc->sc_ic;
 
 	/*
 	 * If previous processing has found a radar event,
 	 * signal this to the net80211 layer to begin DFS
 	 * processing.
 	 */
 	if (ath_dfs_process_radar_event(sc, sc->sc_curchan)) {
 		/* DFS event found, initiate channel change */
 		/*
 		 * XXX doesn't currently tell us whether the event
 		 * XXX was found in the primary or extension
 		 * XXX channel!
 		 */
 		IEEE80211_LOCK(ic);
 		ieee80211_dfs_notify_radar(ic, sc->sc_curchan);
 		IEEE80211_UNLOCK(ic);
 	}
 }
 
 /*
  * Enable/disable power save.  This must be called with
  * no TX driver locks currently held, so it should only
  * be called from the RX path (which doesn't hold any
  * TX driver locks.)
  */
 static void
 ath_node_powersave(struct ieee80211_node *ni, int enable)
 {
 #ifdef	ATH_SW_PSQ
 	struct ath_node *an = ATH_NODE(ni);
 	struct ieee80211com *ic = ni->ni_ic;
 	struct ath_softc *sc = ic->ic_softc;
 	struct ath_vap *avp = ATH_VAP(ni->ni_vap);
 
 	/* XXX and no TXQ locks should be held here */
 
 	DPRINTF(sc, ATH_DEBUG_NODE_PWRSAVE, "%s: %6D: enable=%d\n",
 	    __func__,
 	    ni->ni_macaddr,
 	    ":",
 	    !! enable);
 
 	/* Suspend or resume software queue handling */
 	if (enable)
 		ath_tx_node_sleep(sc, an);
 	else
 		ath_tx_node_wakeup(sc, an);
 
 	/* Update net80211 state */
 	avp->av_node_ps(ni, enable);
 #else
 	struct ath_vap *avp = ATH_VAP(ni->ni_vap);
 
 	/* Update net80211 state */
 	avp->av_node_ps(ni, enable);
 #endif/* ATH_SW_PSQ */
 }
 
 /*
  * Notification from net80211 that the powersave queue state has
  * changed.
  *
  * Since the software queue also may have some frames:
  *
  * + if the node software queue has frames and the TID state
  *   is 0, we set the TIM;
  * + if the node and the stack are both empty, we clear the TIM bit.
  * + If the stack tries to set the bit, always set it.
  * + If the stack tries to clear the bit, only clear it if the
  *   software queue in question is also cleared.
  *
  * TODO: this is called during node teardown; so let's ensure this
  * is all correctly handled and that the TIM bit is cleared.
  * It may be that the node flush is called _AFTER_ the net80211
  * stack clears the TIM.
  *
  * Here is the racy part.  Since it's possible >1 concurrent,
  * overlapping TXes will appear complete with a TX completion in
  * another thread, it's possible that the concurrent TIM calls will
  * clash.  We can't hold the node lock here because setting the
  * TIM grabs the net80211 comlock and this may cause a LOR.
  * The solution is either to totally serialise _everything_ at
  * this point (ie, all TX, completion and any reset/flush go into
  * one taskqueue) or a new "ath TIM lock" needs to be created that
  * just wraps the driver state change and this call to avp->av_set_tim().
  *
  * The same race exists in the net80211 power save queue handling
  * as well.  Since multiple transmitting threads may queue frames
  * into the driver, as well as ps-poll and the driver transmitting
  * frames (and thus clearing the psq), it's quite possible that
  * a packet entering the PSQ and a ps-poll being handled will
  * race, causing the TIM to be cleared and not re-set.
  */
 static int
 ath_node_set_tim(struct ieee80211_node *ni, int enable)
 {
 #ifdef	ATH_SW_PSQ
 	struct ieee80211com *ic = ni->ni_ic;
 	struct ath_softc *sc = ic->ic_softc;
 	struct ath_node *an = ATH_NODE(ni);
 	struct ath_vap *avp = ATH_VAP(ni->ni_vap);
 	int changed = 0;
 
 	ATH_TX_LOCK(sc);
 	an->an_stack_psq = enable;
 
 	/*
 	 * This will get called for all operating modes,
 	 * even if avp->av_set_tim is unset.
 	 * It's currently set for hostap/ibss modes; but
 	 * the same infrastructure is used for both STA
 	 * and AP/IBSS node power save.
 	 */
 	if (avp->av_set_tim == NULL) {
 		ATH_TX_UNLOCK(sc);
 		return (0);
 	}
 
 	/*
 	 * If setting the bit, always set it here.
 	 * If clearing the bit, only clear it if the
 	 * software queue is also empty.
 	 *
 	 * If the node has left power save, just clear the TIM
 	 * bit regardless of the state of the power save queue.
 	 *
 	 * XXX TODO: although atomics are used, it's quite possible
 	 * that a race will occur between this and setting/clearing
 	 * in another thread.  TX completion will occur always in
 	 * one thread, however setting/clearing the TIM bit can come
 	 * from a variety of different process contexts!
 	 */
 	if (enable && an->an_tim_set == 1) {
 		DPRINTF(sc, ATH_DEBUG_NODE_PWRSAVE,
 		    "%s: %6D: enable=%d, tim_set=1, ignoring\n",
 		    __func__,
 		    ni->ni_macaddr,
 		    ":",
 		    enable);
 		ATH_TX_UNLOCK(sc);
 	} else if (enable) {
 		DPRINTF(sc, ATH_DEBUG_NODE_PWRSAVE,
 		    "%s: %6D: enable=%d, enabling TIM\n",
 		    __func__,
 		    ni->ni_macaddr,
 		    ":",
 		    enable);
 		an->an_tim_set = 1;
 		ATH_TX_UNLOCK(sc);
 		changed = avp->av_set_tim(ni, enable);
 	} else if (an->an_swq_depth == 0) {
 		/* disable */
 		DPRINTF(sc, ATH_DEBUG_NODE_PWRSAVE,
 		    "%s: %6D: enable=%d, an_swq_depth == 0, disabling\n",
 		    __func__,
 		    ni->ni_macaddr,
 		    ":",
 		    enable);
 		an->an_tim_set = 0;
 		ATH_TX_UNLOCK(sc);
 		changed = avp->av_set_tim(ni, enable);
 	} else if (! an->an_is_powersave) {
 		/*
 		 * disable regardless; the node isn't in powersave now
 		 */
 		DPRINTF(sc, ATH_DEBUG_NODE_PWRSAVE,
 		    "%s: %6D: enable=%d, an_pwrsave=0, disabling\n",
 		    __func__,
 		    ni->ni_macaddr,
 		    ":",
 		    enable);
 		an->an_tim_set = 0;
 		ATH_TX_UNLOCK(sc);
 		changed = avp->av_set_tim(ni, enable);
 	} else {
 		/*
 		 * psq disable, node is currently in powersave, node
 		 * software queue isn't empty, so don't clear the TIM bit
 		 * for now.
 		 */
 		ATH_TX_UNLOCK(sc);
 		DPRINTF(sc, ATH_DEBUG_NODE_PWRSAVE,
 		    "%s: %6D: enable=%d, an_swq_depth > 0, ignoring\n",
 		    __func__,
 		    ni->ni_macaddr,
 		    ":",
 		    enable);
 		changed = 0;
 	}
 
 	return (changed);
 #else
 	struct ath_vap *avp = ATH_VAP(ni->ni_vap);
 
 	/*
 	 * Some operating modes don't set av_set_tim(), so don't
 	 * update it here.
 	 */
 	if (avp->av_set_tim == NULL)
 		return (0);
 
 	return (avp->av_set_tim(ni, enable));
 #endif /* ATH_SW_PSQ */
 }
 
 /*
  * Set or update the TIM from the software queue.
  *
  * Check the software queue depth before attempting to do lock
  * anything; that avoids trying to obtain the lock.  Then,
  * re-check afterwards to ensure nothing has changed in the
  * meantime.
  *
  * set:   This is designed to be called from the TX path, after
  *        a frame has been queued; to see if the swq > 0.
  *
  * clear: This is designed to be called from the buffer completion point
  *        (right now it's ath_tx_default_comp()) where the state of
  *        a software queue has changed.
  *
  * It makes sense to place it at buffer free / completion rather
  * than after each software queue operation, as there's no real
  * point in churning the TIM bit as the last frames in the software
  * queue are transmitted.  If they fail and we retry them, we'd
  * just be setting the TIM bit again anyway.
  */
 void
 ath_tx_update_tim(struct ath_softc *sc, struct ieee80211_node *ni,
      int enable)
 {
 #ifdef	ATH_SW_PSQ
 	struct ath_node *an;
 	struct ath_vap *avp;
 
 	/* Don't do this for broadcast/etc frames */
 	if (ni == NULL)
 		return;
 
 	an = ATH_NODE(ni);
 	avp = ATH_VAP(ni->ni_vap);
 
 	/*
 	 * And for operating modes without the TIM handler set, let's
 	 * just skip those.
 	 */
 	if (avp->av_set_tim == NULL)
 		return;
 
 	ATH_TX_LOCK_ASSERT(sc);
 
 	if (enable) {
 		if (an->an_is_powersave &&
 		    an->an_tim_set == 0 &&
 		    an->an_swq_depth != 0) {
 			DPRINTF(sc, ATH_DEBUG_NODE_PWRSAVE,
 			    "%s: %6D: swq_depth>0, tim_set=0, set!\n",
 			    __func__,
 			    ni->ni_macaddr,
 			    ":");
 			an->an_tim_set = 1;
 			(void) avp->av_set_tim(ni, 1);
 		}
 	} else {
 		/*
 		 * Don't bother grabbing the lock unless the queue is empty.
 		 */
 		if (an->an_swq_depth != 0)
 			return;
 
 		if (an->an_is_powersave &&
 		    an->an_stack_psq == 0 &&
 		    an->an_tim_set == 1 &&
 		    an->an_swq_depth == 0) {
 			DPRINTF(sc, ATH_DEBUG_NODE_PWRSAVE,
 			    "%s: %6D: swq_depth=0, tim_set=1, psq_set=0,"
 			    " clear!\n",
 			    __func__,
 			    ni->ni_macaddr,
 			    ":");
 			an->an_tim_set = 0;
 			(void) avp->av_set_tim(ni, 0);
 		}
 	}
 #else
 	return;
 #endif	/* ATH_SW_PSQ */
 }
 
 /*
  * Received a ps-poll frame from net80211.
  *
  * Here we get a chance to serve out a software-queued frame ourselves
  * before we punt it to net80211 to transmit us one itself - either
  * because there's traffic in the net80211 psq, or a NULL frame to
  * indicate there's nothing else.
  */
 static void
 ath_node_recv_pspoll(struct ieee80211_node *ni, struct mbuf *m)
 {
 #ifdef	ATH_SW_PSQ
 	struct ath_node *an;
 	struct ath_vap *avp;
 	struct ieee80211com *ic = ni->ni_ic;
 	struct ath_softc *sc = ic->ic_softc;
 	int tid;
 
 	/* Just paranoia */
 	if (ni == NULL)
 		return;
 
 	/*
 	 * Unassociated (temporary node) station.
 	 */
 	if (ni->ni_associd == 0)
 		return;
 
 	/*
 	 * We do have an active node, so let's begin looking into it.
 	 */
 	an = ATH_NODE(ni);
 	avp = ATH_VAP(ni->ni_vap);
 
 	/*
 	 * For now, we just call the original ps-poll method.
 	 * Once we're ready to flip this on:
 	 *
 	 * + Set leak to 1, as no matter what we're going to have
 	 *   to send a frame;
 	 * + Check the software queue and if there's something in it,
 	 *   schedule the highest TID thas has traffic from this node.
 	 *   Then make sure we schedule the software scheduler to
 	 *   run so it picks up said frame.
 	 *
 	 * That way whatever happens, we'll at least send _a_ frame
 	 * to the given node.
 	 *
 	 * Again, yes, it's crappy QoS if the node has multiple
 	 * TIDs worth of traffic - but let's get it working first
 	 * before we optimise it.
 	 *
 	 * Also yes, there's definitely latency here - we're not
 	 * direct dispatching to the hardware in this path (and
 	 * we're likely being called from the packet receive path,
 	 * so going back into TX may be a little hairy!) but again
 	 * I'd like to get this working first before optimising
 	 * turn-around time.
 	 */
 
 	ATH_TX_LOCK(sc);
 
 	/*
 	 * Legacy - we're called and the node isn't asleep.
 	 * Immediately punt.
 	 */
 	if (! an->an_is_powersave) {
 		DPRINTF(sc, ATH_DEBUG_NODE_PWRSAVE,
 		    "%s: %6D: not in powersave?\n",
 		    __func__,
 		    ni->ni_macaddr,
 		    ":");
 		ATH_TX_UNLOCK(sc);
 		avp->av_recv_pspoll(ni, m);
 		return;
 	}
 
 	/*
 	 * We're in powersave.
 	 *
 	 * Leak a frame.
 	 */
 	an->an_leak_count = 1;
 
 	/*
 	 * Now, if there's no frames in the node, just punt to
 	 * recv_pspoll.
 	 *
 	 * Don't bother checking if the TIM bit is set, we really
 	 * only care if there are any frames here!
 	 */
 	if (an->an_swq_depth == 0) {
 		ATH_TX_UNLOCK(sc);
 		DPRINTF(sc, ATH_DEBUG_NODE_PWRSAVE,
 		    "%s: %6D: SWQ empty; punting to net80211\n",
 		    __func__,
 		    ni->ni_macaddr,
 		    ":");
 		avp->av_recv_pspoll(ni, m);
 		return;
 	}
 
 	/*
 	 * Ok, let's schedule the highest TID that has traffic
 	 * and then schedule something.
 	 */
 	for (tid = IEEE80211_TID_SIZE - 1; tid >= 0; tid--) {
 		struct ath_tid *atid = &an->an_tid[tid];
 		/*
 		 * No frames? Skip.
 		 */
 		if (atid->axq_depth == 0)
 			continue;
 		ath_tx_tid_sched(sc, atid);
 		/*
 		 * XXX we could do a direct call to the TXQ
 		 * scheduler code here to optimise latency
 		 * at the expense of a REALLY deep callstack.
 		 */
 		ATH_TX_UNLOCK(sc);
 		taskqueue_enqueue(sc->sc_tq, &sc->sc_txqtask);
 		DPRINTF(sc, ATH_DEBUG_NODE_PWRSAVE,
 		    "%s: %6D: leaking frame to TID %d\n",
 		    __func__,
 		    ni->ni_macaddr,
 		    ":",
 		    tid);
 		return;
 	}
 
 	ATH_TX_UNLOCK(sc);
 
 	/*
 	 * XXX nothing in the TIDs at this point? Eek.
 	 */
 	DPRINTF(sc, ATH_DEBUG_NODE_PWRSAVE,
 	    "%s: %6D: TIDs empty, but ath_node showed traffic?!\n",
 	    __func__,
 	    ni->ni_macaddr,
 	    ":");
 	avp->av_recv_pspoll(ni, m);
 #else
 	avp->av_recv_pspoll(ni, m);
 #endif	/* ATH_SW_PSQ */
 }
 
 MODULE_VERSION(if_ath, 1);
 MODULE_DEPEND(if_ath, wlan, 1, 1, 1);          /* 802.11 media layer */
 #if	defined(IEEE80211_ALQ) || defined(AH_DEBUG_ALQ) || defined(ATH_DEBUG_ALQ)
 MODULE_DEPEND(if_ath, alq, 1, 1, 1);
 #endif
Index: projects/clang391-import/sys/dev/ath/if_ath_beacon.c
===================================================================
--- projects/clang391-import/sys/dev/ath/if_ath_beacon.c	(revision 309262)
+++ projects/clang391-import/sys/dev/ath/if_ath_beacon.c	(revision 309263)
@@ -1,1188 +1,1205 @@
 /*-
  * Copyright (c) 2002-2009 Sam Leffler, Errno Consulting
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer,
  *    without modification.
  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
  *    similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any
  *    redistribution must be conditioned upon including a substantially
  *    similar Disclaimer requirement for further binary redistribution.
  *
  * NO WARRANTY
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY
  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
  * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY,
  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
  * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  * THE POSSIBILITY OF SUCH DAMAGES.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * Driver for the Atheros Wireless LAN controller.
  *
  * This software is derived from work of Atsushi Onoe; his contribution
  * is greatly appreciated.
  */
 
 #include "opt_inet.h"
 #include "opt_ath.h"
 /*
  * This is needed for register operations which are performed
  * by the driver - eg, calls to ath_hal_gettsf32().
  *
  * It's also required for any AH_DEBUG checks in here, eg the
  * module dependencies.
  */
 #include "opt_ah.h"
 #include "opt_wlan.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sysctl.h>
 #include <sys/mbuf.h>
 #include <sys/malloc.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/kernel.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/errno.h>
 #include <sys/callout.h>
 #include <sys/bus.h>
 #include <sys/endian.h>
 #include <sys/kthread.h>
 #include <sys/taskqueue.h>
 #include <sys/priv.h>
 #include <sys/module.h>
 #include <sys/ktr.h>
 #include <sys/smp.h>	/* for mp_ncpus */
 
 #include <machine/bus.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/if_media.h>
 #include <net/if_types.h>
 #include <net/if_arp.h>
 #include <net/ethernet.h>
 #include <net/if_llc.h>
 
 #include <net80211/ieee80211_var.h>
 #include <net80211/ieee80211_regdomain.h>
 #ifdef IEEE80211_SUPPORT_SUPERG
 #include <net80211/ieee80211_superg.h>
 #endif
 
 #include <net/bpf.h>
 
 #ifdef INET
 #include <netinet/in.h>
 #include <netinet/if_ether.h>
 #endif
 
 #include <dev/ath/if_athvar.h>
 
 #include <dev/ath/if_ath_debug.h>
 #include <dev/ath/if_ath_misc.h>
 #include <dev/ath/if_ath_tx.h>
 #include <dev/ath/if_ath_beacon.h>
 
 #ifdef ATH_TX99_DIAG
 #include <dev/ath/ath_tx99/ath_tx99.h>
 #endif
 
 /*
  * Setup a h/w transmit queue for beacons.
  */
 int
 ath_beaconq_setup(struct ath_softc *sc)
 {
 	struct ath_hal *ah = sc->sc_ah;
 	HAL_TXQ_INFO qi;
 
 	memset(&qi, 0, sizeof(qi));
 	qi.tqi_aifs = HAL_TXQ_USEDEFAULT;
 	qi.tqi_cwmin = HAL_TXQ_USEDEFAULT;
 	qi.tqi_cwmax = HAL_TXQ_USEDEFAULT;
 	/* NB: for dynamic turbo, don't enable any other interrupts */
 	qi.tqi_qflags = HAL_TXQ_TXDESCINT_ENABLE;
 	if (sc->sc_isedma)
 		qi.tqi_qflags |= HAL_TXQ_TXOKINT_ENABLE |
 		    HAL_TXQ_TXERRINT_ENABLE;
 
 	return ath_hal_setuptxqueue(ah, HAL_TX_QUEUE_BEACON, &qi);
 }
 
 /*
  * Setup the transmit queue parameters for the beacon queue.
  */
 int
 ath_beaconq_config(struct ath_softc *sc)
 {
 #define	ATH_EXPONENT_TO_VALUE(v)	((1<<(v))-1)
 	struct ieee80211com *ic = &sc->sc_ic;
 	struct ath_hal *ah = sc->sc_ah;
 	HAL_TXQ_INFO qi;
 
 	ath_hal_gettxqueueprops(ah, sc->sc_bhalq, &qi);
 	if (ic->ic_opmode == IEEE80211_M_HOSTAP ||
 	    ic->ic_opmode == IEEE80211_M_MBSS) {
 		/*
 		 * Always burst out beacon and CAB traffic.
 		 */
 		qi.tqi_aifs = ATH_BEACON_AIFS_DEFAULT;
 		qi.tqi_cwmin = ATH_BEACON_CWMIN_DEFAULT;
 		qi.tqi_cwmax = ATH_BEACON_CWMAX_DEFAULT;
 	} else {
 		struct wmeParams *wmep =
 			&ic->ic_wme.wme_chanParams.cap_wmeParams[WME_AC_BE];
 		/*
 		 * Adhoc mode; important thing is to use 2x cwmin.
 		 */
 		qi.tqi_aifs = wmep->wmep_aifsn;
 		qi.tqi_cwmin = 2*ATH_EXPONENT_TO_VALUE(wmep->wmep_logcwmin);
 		qi.tqi_cwmax = ATH_EXPONENT_TO_VALUE(wmep->wmep_logcwmax);
 	}
 
 	if (!ath_hal_settxqueueprops(ah, sc->sc_bhalq, &qi)) {
 		device_printf(sc->sc_dev, "unable to update parameters for "
 			"beacon hardware queue!\n");
 		return 0;
 	} else {
 		ath_hal_resettxqueue(ah, sc->sc_bhalq); /* push to h/w */
 		return 1;
 	}
 #undef ATH_EXPONENT_TO_VALUE
 }
 
 /*
  * Allocate and setup an initial beacon frame.
  */
 int
 ath_beacon_alloc(struct ath_softc *sc, struct ieee80211_node *ni)
 {
 	struct ieee80211vap *vap = ni->ni_vap;
 	struct ath_vap *avp = ATH_VAP(vap);
 	struct ath_buf *bf;
 	struct mbuf *m;
 	int error;
 
 	bf = avp->av_bcbuf;
 	DPRINTF(sc, ATH_DEBUG_NODE, "%s: bf_m=%p, bf_node=%p\n",
 	    __func__, bf->bf_m, bf->bf_node);
 	if (bf->bf_m != NULL) {
 		bus_dmamap_unload(sc->sc_dmat, bf->bf_dmamap);
 		m_freem(bf->bf_m);
 		bf->bf_m = NULL;
 	}
 	if (bf->bf_node != NULL) {
 		ieee80211_free_node(bf->bf_node);
 		bf->bf_node = NULL;
 	}
 
 	/*
 	 * NB: the beacon data buffer must be 32-bit aligned;
 	 * we assume the mbuf routines will return us something
 	 * with this alignment (perhaps should assert).
 	 */
 	m = ieee80211_beacon_alloc(ni);
 	if (m == NULL) {
 		device_printf(sc->sc_dev, "%s: cannot get mbuf\n", __func__);
 		sc->sc_stats.ast_be_nombuf++;
 		return ENOMEM;
 	}
 	error = bus_dmamap_load_mbuf_sg(sc->sc_dmat, bf->bf_dmamap, m,
 				     bf->bf_segs, &bf->bf_nseg,
 				     BUS_DMA_NOWAIT);
 	if (error != 0) {
 		device_printf(sc->sc_dev,
 		    "%s: cannot map mbuf, bus_dmamap_load_mbuf_sg returns %d\n",
 		    __func__, error);
 		m_freem(m);
 		return error;
 	}
 
 	/*
 	 * Calculate a TSF adjustment factor required for staggered
 	 * beacons.  Note that we assume the format of the beacon
 	 * frame leaves the tstamp field immediately following the
 	 * header.
 	 */
 	if (sc->sc_stagbeacons && avp->av_bslot > 0) {
 		uint64_t tsfadjust;
 		struct ieee80211_frame *wh;
 
 		/*
 		 * The beacon interval is in TU's; the TSF is in usecs.
 		 * We figure out how many TU's to add to align the timestamp
 		 * then convert to TSF units and handle byte swapping before
 		 * inserting it in the frame.  The hardware will then add this
 		 * each time a beacon frame is sent.  Note that we align vap's
 		 * 1..N and leave vap 0 untouched.  This means vap 0 has a
 		 * timestamp in one beacon interval while the others get a
 		 * timstamp aligned to the next interval.
 		 */
 		tsfadjust = ni->ni_intval *
 		    (ATH_BCBUF - avp->av_bslot) / ATH_BCBUF;
 		tsfadjust = htole64(tsfadjust << 10);	/* TU -> TSF */
 
 		DPRINTF(sc, ATH_DEBUG_BEACON,
 		    "%s: %s beacons bslot %d intval %u tsfadjust %llu\n",
 		    __func__, sc->sc_stagbeacons ? "stagger" : "burst",
 		    avp->av_bslot, ni->ni_intval,
 		    (long long unsigned) le64toh(tsfadjust));
 
 		wh = mtod(m, struct ieee80211_frame *);
 		memcpy(&wh[1], &tsfadjust, sizeof(tsfadjust));
 	}
 	bf->bf_m = m;
 	bf->bf_node = ieee80211_ref_node(ni);
 
 	return 0;
 }
 
 /*
  * Setup the beacon frame for transmit.
  */
 static void
 ath_beacon_setup(struct ath_softc *sc, struct ath_buf *bf)
 {
 #define	USE_SHPREAMBLE(_ic) \
 	(((_ic)->ic_flags & (IEEE80211_F_SHPREAMBLE | IEEE80211_F_USEBARKER))\
 		== IEEE80211_F_SHPREAMBLE)
 	struct ieee80211_node *ni = bf->bf_node;
 	struct ieee80211com *ic = ni->ni_ic;
 	struct mbuf *m = bf->bf_m;
 	struct ath_hal *ah = sc->sc_ah;
 	struct ath_desc *ds;
 	int flags, antenna;
 	const HAL_RATE_TABLE *rt;
 	u_int8_t rix, rate;
 	HAL_DMA_ADDR bufAddrList[4];
 	uint32_t segLenList[4];
 	HAL_11N_RATE_SERIES rc[4];
 
 	DPRINTF(sc, ATH_DEBUG_BEACON_PROC, "%s: m %p len %u\n",
 		__func__, m, m->m_len);
 
 	/* setup descriptors */
 	ds = bf->bf_desc;
 	bf->bf_last = bf;
 	bf->bf_lastds = ds;
 
 	flags = HAL_TXDESC_NOACK;
 	if (ic->ic_opmode == IEEE80211_M_IBSS && sc->sc_hasveol) {
 		/* self-linked descriptor */
 		ath_hal_settxdesclink(sc->sc_ah, ds, bf->bf_daddr);
 		flags |= HAL_TXDESC_VEOL;
 		/*
 		 * Let hardware handle antenna switching.
 		 */
 		antenna = sc->sc_txantenna;
 	} else {
 		ath_hal_settxdesclink(sc->sc_ah, ds, 0);
 		/*
 		 * Switch antenna every 4 beacons.
 		 * XXX assumes two antenna
 		 */
 		if (sc->sc_txantenna != 0)
 			antenna = sc->sc_txantenna;
 		else if (sc->sc_stagbeacons && sc->sc_nbcnvaps != 0)
 			antenna = ((sc->sc_stats.ast_be_xmit / sc->sc_nbcnvaps) & 4 ? 2 : 1);
 		else
 			antenna = (sc->sc_stats.ast_be_xmit & 4 ? 2 : 1);
 	}
 
 	KASSERT(bf->bf_nseg == 1,
 		("multi-segment beacon frame; nseg %u", bf->bf_nseg));
 
 	/*
 	 * Calculate rate code.
 	 * XXX everything at min xmit rate
 	 */
 	rix = 0;
 	rt = sc->sc_currates;
 	rate = rt->info[rix].rateCode;
 	if (USE_SHPREAMBLE(ic))
 		rate |= rt->info[rix].shortPreamble;
 	ath_hal_setuptxdesc(ah, ds
 		, m->m_len + IEEE80211_CRC_LEN	/* frame length */
 		, sizeof(struct ieee80211_frame)/* header length */
 		, HAL_PKT_TYPE_BEACON		/* Atheros packet type */
 		, ieee80211_get_node_txpower(ni)	/* txpower XXX */
 		, rate, 1			/* series 0 rate/tries */
 		, HAL_TXKEYIX_INVALID		/* no encryption */
 		, antenna			/* antenna mode */
 		, flags				/* no ack, veol for beacons */
 		, 0				/* rts/cts rate */
 		, 0				/* rts/cts duration */
 	);
 
 	/*
 	 * The EDMA HAL currently assumes that _all_ rate control
 	 * settings are done in ath_hal_set11nratescenario(), rather
 	 * than in ath_hal_setuptxdesc().
 	 */
 	if (sc->sc_isedma) {
 		memset(&rc, 0, sizeof(rc));
 
 		rc[0].ChSel = sc->sc_txchainmask;
 		rc[0].Tries = 1;
 		rc[0].Rate = rt->info[rix].rateCode;
 		rc[0].RateIndex = rix;
 		rc[0].tx_power_cap = 0x3f;
 		rc[0].PktDuration =
 		    ath_hal_computetxtime(ah, rt, roundup(m->m_len, 4),
 		        rix, 0, AH_TRUE);
 		ath_hal_set11nratescenario(ah, ds, 0, 0, rc, 4, flags);
 	}
 
 	/* NB: beacon's BufLen must be a multiple of 4 bytes */
 	segLenList[0] = roundup(m->m_len, 4);
 	segLenList[1] = segLenList[2] = segLenList[3] = 0;
 	bufAddrList[0] = bf->bf_segs[0].ds_addr;
 	bufAddrList[1] = bufAddrList[2] = bufAddrList[3] = 0;
 	ath_hal_filltxdesc(ah, ds
 		, bufAddrList
 		, segLenList
 		, 0				/* XXX desc id */
 		, sc->sc_bhalq			/* hardware TXQ */
 		, AH_TRUE			/* first segment */
 		, AH_TRUE			/* last segment */
 		, ds				/* first descriptor */
 	);
 #if 0
 	ath_desc_swap(ds);
 #endif
 #undef USE_SHPREAMBLE
 }
 
 void
 ath_beacon_update(struct ieee80211vap *vap, int item)
 {
 	struct ieee80211_beacon_offsets *bo = &vap->iv_bcn_off;
 
 	setbit(bo->bo_flags, item);
 }
 
 /*
  * Handle a beacon miss.
  */
 void
 ath_beacon_miss(struct ath_softc *sc)
 {
 	HAL_SURVEY_SAMPLE hs;
 	HAL_BOOL ret;
 	uint32_t hangs;
 
 	bzero(&hs, sizeof(hs));
 
 	ret = ath_hal_get_mib_cycle_counts(sc->sc_ah, &hs);
 
 	if (ath_hal_gethangstate(sc->sc_ah, 0xffff, &hangs) && hangs != 0) {
 		DPRINTF(sc, ATH_DEBUG_BEACON,
 		    "%s: hang=0x%08x\n",
 		    __func__,
 		    hangs);
 	}
 
 #ifdef	ATH_DEBUG_ALQ
 	if (if_ath_alq_checkdebug(&sc->sc_alq, ATH_ALQ_MISSED_BEACON))
 		if_ath_alq_post(&sc->sc_alq, ATH_ALQ_MISSED_BEACON, 0, NULL);
 #endif
 
 	DPRINTF(sc, ATH_DEBUG_BEACON,
 	    "%s: valid=%d, txbusy=%u, rxbusy=%u, chanbusy=%u, "
 	    "extchanbusy=%u, cyclecount=%u\n",
 	    __func__,
 	    ret,
 	    hs.tx_busy,
 	    hs.rx_busy,
 	    hs.chan_busy,
 	    hs.ext_chan_busy,
 	    hs.cycle_count);
 }
 
 /*
  * Transmit a beacon frame at SWBA.  Dynamic updates to the
  * frame contents are done as needed and the slot time is
  * also adjusted based on current state.
  */
 void
 ath_beacon_proc(void *arg, int pending)
 {
 	struct ath_softc *sc = arg;
 	struct ath_hal *ah = sc->sc_ah;
 	struct ieee80211vap *vap;
 	struct ath_buf *bf;
 	int slot, otherant;
 	uint32_t bfaddr;
 
 	DPRINTF(sc, ATH_DEBUG_BEACON_PROC, "%s: pending %u\n",
 		__func__, pending);
 	/*
 	 * Check if the previous beacon has gone out.  If
 	 * not don't try to post another, skip this period
 	 * and wait for the next.  Missed beacons indicate
 	 * a problem and should not occur.  If we miss too
 	 * many consecutive beacons reset the device.
 	 */
 	if (ath_hal_numtxpending(ah, sc->sc_bhalq) != 0) {
 		sc->sc_bmisscount++;
 		sc->sc_stats.ast_be_missed++;
 		ath_beacon_miss(sc);
 		DPRINTF(sc, ATH_DEBUG_BEACON,
 			"%s: missed %u consecutive beacons\n",
 			__func__, sc->sc_bmisscount);
 		if (sc->sc_bmisscount >= ath_bstuck_threshold)
 			taskqueue_enqueue(sc->sc_tq, &sc->sc_bstucktask);
 		return;
 	}
 	if (sc->sc_bmisscount != 0) {
 		DPRINTF(sc, ATH_DEBUG_BEACON,
 			"%s: resume beacon xmit after %u misses\n",
 			__func__, sc->sc_bmisscount);
 		sc->sc_bmisscount = 0;
 #ifdef	ATH_DEBUG_ALQ
 		if (if_ath_alq_checkdebug(&sc->sc_alq, ATH_ALQ_RESUME_BEACON))
 			if_ath_alq_post(&sc->sc_alq, ATH_ALQ_RESUME_BEACON, 0, NULL);
 #endif
 	}
 
 	if (sc->sc_stagbeacons) {			/* staggered beacons */
 		struct ieee80211com *ic = &sc->sc_ic;
 		uint32_t tsftu;
 
 		tsftu = ath_hal_gettsf32(ah) >> 10;
 		/* XXX lintval */
 		slot = ((tsftu % ic->ic_lintval) * ATH_BCBUF) / ic->ic_lintval;
 		vap = sc->sc_bslot[(slot+1) % ATH_BCBUF];
 		bfaddr = 0;
 		if (vap != NULL && vap->iv_state >= IEEE80211_S_RUN) {
 			bf = ath_beacon_generate(sc, vap);
 			if (bf != NULL)
 				bfaddr = bf->bf_daddr;
 		}
 	} else {					/* burst'd beacons */
 		uint32_t *bflink = &bfaddr;
 
 		for (slot = 0; slot < ATH_BCBUF; slot++) {
 			vap = sc->sc_bslot[slot];
 			if (vap != NULL && vap->iv_state >= IEEE80211_S_RUN) {
 				bf = ath_beacon_generate(sc, vap);
 				/*
 				 * XXX TODO: this should use settxdesclinkptr()
 				 * otherwise it won't work for EDMA chipsets!
 				 */
 				if (bf != NULL) {
 					/* XXX should do this using the ds */
 					*bflink = bf->bf_daddr;
 					ath_hal_gettxdesclinkptr(sc->sc_ah,
 					    bf->bf_desc, &bflink);
 				}
 			}
 		}
 		/*
 		 * XXX TODO: this should use settxdesclinkptr()
 		 * otherwise it won't work for EDMA chipsets!
 		 */
 		*bflink = 0;				/* terminate list */
 	}
 
 	/*
 	 * Handle slot time change when a non-ERP station joins/leaves
 	 * an 11g network.  The 802.11 layer notifies us via callback,
 	 * we mark updateslot, then wait one beacon before effecting
 	 * the change.  This gives associated stations at least one
 	 * beacon interval to note the state change.
 	 */
 	/* XXX locking */
 	if (sc->sc_updateslot == UPDATE) {
 		sc->sc_updateslot = COMMIT;	/* commit next beacon */
 		sc->sc_slotupdate = slot;
 	} else if (sc->sc_updateslot == COMMIT && sc->sc_slotupdate == slot)
 		ath_setslottime(sc);		/* commit change to h/w */
 
 	/*
 	 * Check recent per-antenna transmit statistics and flip
 	 * the default antenna if noticeably more frames went out
 	 * on the non-default antenna.
 	 * XXX assumes 2 anntenae
 	 */
 	if (!sc->sc_diversity && (!sc->sc_stagbeacons || slot == 0)) {
 		otherant = sc->sc_defant & 1 ? 2 : 1;
 		if (sc->sc_ant_tx[otherant] > sc->sc_ant_tx[sc->sc_defant] + 2)
 			ath_setdefantenna(sc, otherant);
 		sc->sc_ant_tx[1] = sc->sc_ant_tx[2] = 0;
 	}
 
 	/* Program the CABQ with the contents of the CABQ txq and start it */
 	ATH_TXQ_LOCK(sc->sc_cabq);
 	ath_beacon_cabq_start(sc);
 	ATH_TXQ_UNLOCK(sc->sc_cabq);
 
 	/* Program the new beacon frame if we have one for this interval */
 	if (bfaddr != 0) {
 		/*
 		 * Stop any current dma and put the new frame on the queue.
 		 * This should never fail since we check above that no frames
 		 * are still pending on the queue.
 		 */
 		if (! sc->sc_isedma) {
 			if (!ath_hal_stoptxdma(ah, sc->sc_bhalq)) {
 				DPRINTF(sc, ATH_DEBUG_ANY,
 					"%s: beacon queue %u did not stop?\n",
 					__func__, sc->sc_bhalq);
 			}
 		}
 		/* NB: cabq traffic should already be queued and primed */
 
 		ath_hal_puttxbuf(ah, sc->sc_bhalq, bfaddr);
 		ath_hal_txstart(ah, sc->sc_bhalq);
 
 		sc->sc_stats.ast_be_xmit++;
 	}
 }
 
 static void
 ath_beacon_cabq_start_edma(struct ath_softc *sc)
 {
 	struct ath_buf *bf, *bf_last;
 	struct ath_txq *cabq = sc->sc_cabq;
 #if 0
 	struct ath_buf *bfi;
 	int i = 0;
 #endif
 
 	ATH_TXQ_LOCK_ASSERT(cabq);
 
 	if (TAILQ_EMPTY(&cabq->axq_q))
 		return;
 	bf = TAILQ_FIRST(&cabq->axq_q);
 	bf_last = TAILQ_LAST(&cabq->axq_q, axq_q_s);
 
 	/*
 	 * This is a dirty, dirty hack to push the contents of
 	 * the cabq staging queue into the FIFO.
 	 *
 	 * This ideally should live in the EDMA code file
 	 * and only push things into the CABQ if there's a FIFO
 	 * slot.
 	 *
 	 * We can't treat this like a normal TX queue because
 	 * in the case of multi-VAP traffic, we may have to flush
 	 * the CABQ each new (staggered) beacon that goes out.
 	 * But for non-staggered beacons, we could in theory
 	 * handle multicast traffic for all VAPs in one FIFO
 	 * push.  Just keep all of this in mind if you're wondering
 	 * how to correctly/better handle multi-VAP CABQ traffic
 	 * with EDMA.
 	 */
 
 	/*
 	 * Is the CABQ FIFO free? If not, complain loudly and
 	 * don't queue anything.  Maybe we'll flush the CABQ
 	 * traffic, maybe we won't.  But that'll happen next
 	 * beacon interval.
 	 */
 	if (cabq->axq_fifo_depth >= HAL_TXFIFO_DEPTH) {
 		device_printf(sc->sc_dev,
 		    "%s: Q%d: CAB FIFO queue=%d?\n",
 		    __func__,
 		    cabq->axq_qnum,
 		    cabq->axq_fifo_depth);
 		return;
 	}
 
 	/*
 	 * Ok, so here's the gymnastics reqiured to make this
 	 * all sensible.
 	 */
 
 	/*
 	 * Tag the first/last buffer appropriately.
 	 */
 	bf->bf_flags |= ATH_BUF_FIFOPTR;
 	bf_last->bf_flags |= ATH_BUF_FIFOEND;
 
 #if 0
 	i = 0;
 	TAILQ_FOREACH(bfi, &cabq->axq_q, bf_list) {
 		ath_printtxbuf(sc, bf, cabq->axq_qnum, i, 0);
 		i++;
 	}
 #endif
 
 	/*
 	 * We now need to push this set of frames onto the tail
 	 * of the FIFO queue.  We don't adjust the aggregate
 	 * count, only the queue depth counter(s).
 	 * We also need to blank the link pointer now.
 	 */
 	TAILQ_CONCAT(&cabq->fifo.axq_q, &cabq->axq_q, bf_list);
 	cabq->axq_link = NULL;
 	cabq->fifo.axq_depth += cabq->axq_depth;
 	cabq->axq_depth = 0;
 
 	/* Bump FIFO queue */
 	cabq->axq_fifo_depth++;
 
 	/* Push the first entry into the hardware */
 	ath_hal_puttxbuf(sc->sc_ah, cabq->axq_qnum, bf->bf_daddr);
 	cabq->axq_flags |= ATH_TXQ_PUTRUNNING;
 
 	/* NB: gated by beacon so safe to start here */
 	ath_hal_txstart(sc->sc_ah, cabq->axq_qnum);
 
 }
 
 static void
 ath_beacon_cabq_start_legacy(struct ath_softc *sc)
 {
 	struct ath_buf *bf;
 	struct ath_txq *cabq = sc->sc_cabq;
 
 	ATH_TXQ_LOCK_ASSERT(cabq);
 	if (TAILQ_EMPTY(&cabq->axq_q))
 		return;
 	bf = TAILQ_FIRST(&cabq->axq_q);
 
 	/* Push the first entry into the hardware */
 	ath_hal_puttxbuf(sc->sc_ah, cabq->axq_qnum, bf->bf_daddr);
 	cabq->axq_flags |= ATH_TXQ_PUTRUNNING;
 
 	/* NB: gated by beacon so safe to start here */
 	ath_hal_txstart(sc->sc_ah, cabq->axq_qnum);
 }
 
 /*
  * Start CABQ transmission - this assumes that all frames are prepped
  * and ready in the CABQ.
  */
 void
 ath_beacon_cabq_start(struct ath_softc *sc)
 {
 	struct ath_txq *cabq = sc->sc_cabq;
 
 	ATH_TXQ_LOCK_ASSERT(cabq);
 
 	if (TAILQ_EMPTY(&cabq->axq_q))
 		return;
 
 	if (sc->sc_isedma)
 		ath_beacon_cabq_start_edma(sc);
 	else
 		ath_beacon_cabq_start_legacy(sc);
 }
 
 struct ath_buf *
 ath_beacon_generate(struct ath_softc *sc, struct ieee80211vap *vap)
 {
 	struct ath_vap *avp = ATH_VAP(vap);
 	struct ath_txq *cabq = sc->sc_cabq;
 	struct ath_buf *bf;
 	struct mbuf *m;
 	int nmcastq, error;
 
 	KASSERT(vap->iv_state >= IEEE80211_S_RUN,
 	    ("not running, state %d", vap->iv_state));
 	KASSERT(avp->av_bcbuf != NULL, ("no beacon buffer"));
 
 	/*
 	 * Update dynamic beacon contents.  If this returns
 	 * non-zero then we need to remap the memory because
 	 * the beacon frame changed size (probably because
 	 * of the TIM bitmap).
 	 */
 	bf = avp->av_bcbuf;
 	m = bf->bf_m;
 	/* XXX lock mcastq? */
 	nmcastq = avp->av_mcastq.axq_depth;
 
 	if (ieee80211_beacon_update(bf->bf_node, m, nmcastq)) {
 		/* XXX too conservative? */
 		bus_dmamap_unload(sc->sc_dmat, bf->bf_dmamap);
 		error = bus_dmamap_load_mbuf_sg(sc->sc_dmat, bf->bf_dmamap, m,
 					     bf->bf_segs, &bf->bf_nseg,
 					     BUS_DMA_NOWAIT);
 		if (error != 0) {
 			if_printf(vap->iv_ifp,
 			    "%s: bus_dmamap_load_mbuf_sg failed, error %u\n",
 			    __func__, error);
 			return NULL;
 		}
 	}
 	if ((vap->iv_bcn_off.bo_tim[4] & 1) && cabq->axq_depth) {
 		DPRINTF(sc, ATH_DEBUG_BEACON,
 		    "%s: cabq did not drain, mcastq %u cabq %u\n",
 		    __func__, nmcastq, cabq->axq_depth);
 		sc->sc_stats.ast_cabq_busy++;
 		if (sc->sc_nvaps > 1 && sc->sc_stagbeacons) {
 			/*
 			 * CABQ traffic from a previous vap is still pending.
 			 * We must drain the q before this beacon frame goes
 			 * out as otherwise this vap's stations will get cab
 			 * frames from a different vap.
 			 * XXX could be slow causing us to miss DBA
 			 */
 			/*
 			 * XXX TODO: this doesn't stop CABQ DMA - it assumes
 			 * that since we're about to transmit a beacon, we've
 			 * already stopped transmitting on the CABQ.  But this
 			 * doesn't at all mean that the CABQ DMA QCU will
 			 * accept a new TXDP!  So what, should we do a DMA
 			 * stop? What if it fails?
 			 *
 			 * More thought is required here.
 			 */
 			/*
 			 * XXX can we even stop TX DMA here? Check what the
 			 * reference driver does for cabq for beacons, given
 			 * that stopping TX requires RX is paused.
 			 */
 			ath_tx_draintxq(sc, cabq);
 		}
 	}
 	ath_beacon_setup(sc, bf);
 	bus_dmamap_sync(sc->sc_dmat, bf->bf_dmamap, BUS_DMASYNC_PREWRITE);
 
 	/*
 	 * Enable the CAB queue before the beacon queue to
 	 * insure cab frames are triggered by this beacon.
 	 */
 	if (vap->iv_bcn_off.bo_tim[4] & 1) {
 
 		/* NB: only at DTIM */
 		ATH_TXQ_LOCK(&avp->av_mcastq);
 		if (nmcastq) {
 			struct ath_buf *bfm, *bfc_last;
 
 			/*
 			 * Move frames from the s/w mcast q to the h/w cab q.
 			 *
 			 * XXX TODO: if we chain together multiple VAPs
 			 * worth of CABQ traffic, should we keep the
 			 * MORE data bit set on the last frame of each
 			 * intermediary VAP (ie, only clear the MORE
 			 * bit of the last frame on the last vap?)
 			 */
 			bfm = TAILQ_FIRST(&avp->av_mcastq.axq_q);
 			ATH_TXQ_LOCK(cabq);
 
 			/*
 			 * If there's already a frame on the CABQ, we
 			 * need to link to the end of the last frame.
 			 * We can't use axq_link here because
 			 * EDMA descriptors require some recalculation
 			 * (checksum) to occur.
 			 */
 			bfc_last = ATH_TXQ_LAST(cabq, axq_q_s);
 			if (bfc_last != NULL) {
 				ath_hal_settxdesclink(sc->sc_ah,
 				    bfc_last->bf_lastds,
 				    bfm->bf_daddr);
 			}
 			ath_txqmove(cabq, &avp->av_mcastq);
 			ATH_TXQ_UNLOCK(cabq);
 			/*
 			 * XXX not entirely accurate, in case a mcast
 			 * queue frame arrived before we grabbed the TX
 			 * lock.
 			 */
 			sc->sc_stats.ast_cabq_xmit += nmcastq;
 		}
 		ATH_TXQ_UNLOCK(&avp->av_mcastq);
 	}
 	return bf;
 }
 
 void
 ath_beacon_start_adhoc(struct ath_softc *sc, struct ieee80211vap *vap)
 {
 	struct ath_vap *avp = ATH_VAP(vap);
 	struct ath_hal *ah = sc->sc_ah;
 	struct ath_buf *bf;
 	struct mbuf *m;
 	int error;
 
 	KASSERT(avp->av_bcbuf != NULL, ("no beacon buffer"));
 
 	/*
 	 * Update dynamic beacon contents.  If this returns
 	 * non-zero then we need to remap the memory because
 	 * the beacon frame changed size (probably because
 	 * of the TIM bitmap).
 	 */
 	bf = avp->av_bcbuf;
 	m = bf->bf_m;
 	if (ieee80211_beacon_update(bf->bf_node, m, 0)) {
 		/* XXX too conservative? */
 		bus_dmamap_unload(sc->sc_dmat, bf->bf_dmamap);
 		error = bus_dmamap_load_mbuf_sg(sc->sc_dmat, bf->bf_dmamap, m,
 					     bf->bf_segs, &bf->bf_nseg,
 					     BUS_DMA_NOWAIT);
 		if (error != 0) {
 			if_printf(vap->iv_ifp,
 			    "%s: bus_dmamap_load_mbuf_sg failed, error %u\n",
 			    __func__, error);
 			return;
 		}
 	}
 	ath_beacon_setup(sc, bf);
 	bus_dmamap_sync(sc->sc_dmat, bf->bf_dmamap, BUS_DMASYNC_PREWRITE);
 
 	/* NB: caller is known to have already stopped tx dma */
 	ath_hal_puttxbuf(ah, sc->sc_bhalq, bf->bf_daddr);
 	ath_hal_txstart(ah, sc->sc_bhalq);
 }
 
 /*
  * Reclaim beacon resources and return buffer to the pool.
  */
 void
 ath_beacon_return(struct ath_softc *sc, struct ath_buf *bf)
 {
 
 	DPRINTF(sc, ATH_DEBUG_NODE, "%s: free bf=%p, bf_m=%p, bf_node=%p\n",
 	    __func__, bf, bf->bf_m, bf->bf_node);
 	if (bf->bf_m != NULL) {
 		bus_dmamap_unload(sc->sc_dmat, bf->bf_dmamap);
 		m_freem(bf->bf_m);
 		bf->bf_m = NULL;
 	}
 	if (bf->bf_node != NULL) {
 		ieee80211_free_node(bf->bf_node);
 		bf->bf_node = NULL;
 	}
 	TAILQ_INSERT_TAIL(&sc->sc_bbuf, bf, bf_list);
 }
 
 /*
  * Reclaim beacon resources.
  */
 void
 ath_beacon_free(struct ath_softc *sc)
 {
 	struct ath_buf *bf;
 
 	TAILQ_FOREACH(bf, &sc->sc_bbuf, bf_list) {
 		DPRINTF(sc, ATH_DEBUG_NODE,
 		    "%s: free bf=%p, bf_m=%p, bf_node=%p\n",
 		        __func__, bf, bf->bf_m, bf->bf_node);
 		if (bf->bf_m != NULL) {
 			bus_dmamap_unload(sc->sc_dmat, bf->bf_dmamap);
 			m_freem(bf->bf_m);
 			bf->bf_m = NULL;
 		}
 		if (bf->bf_node != NULL) {
 			ieee80211_free_node(bf->bf_node);
 			bf->bf_node = NULL;
 		}
 	}
 }
 
 /*
  * Configure the beacon and sleep timers.
  *
  * When operating as an AP this resets the TSF and sets
  * up the hardware to notify us when we need to issue beacons.
  *
  * When operating in station mode this sets up the beacon
  * timers according to the timestamp of the last received
  * beacon and the current TSF, configures PCF and DTIM
  * handling, programs the sleep registers so the hardware
  * will wakeup in time to receive beacons, and configures
  * the beacon miss handling so we'll receive a BMISS
  * interrupt when we stop seeing beacons from the AP
  * we've associated with.
  */
 void
 ath_beacon_config(struct ath_softc *sc, struct ieee80211vap *vap)
 {
 #define	TSF_TO_TU(_h,_l) \
 	((((u_int32_t)(_h)) << 22) | (((u_int32_t)(_l)) >> 10))
 #define	FUDGE	2
 	struct ath_hal *ah = sc->sc_ah;
 	struct ieee80211com *ic = &sc->sc_ic;
 	struct ieee80211_node *ni;
 	u_int32_t nexttbtt, intval, tsftu;
 	u_int32_t nexttbtt_u8, intval_u8;
 	u_int64_t tsf, tsf_beacon;
 
 	if (vap == NULL)
 		vap = TAILQ_FIRST(&ic->ic_vaps);	/* XXX */
 	/*
 	 * Just ensure that we aren't being called when the last
 	 * VAP is destroyed.
 	 */
 	if (vap == NULL) {
 		device_printf(sc->sc_dev, "%s: called with no VAPs\n",
 		    __func__);
 		return;
 	}
 
 	ni = ieee80211_ref_node(vap->iv_bss);
 
 	ATH_LOCK(sc);
 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
 	ATH_UNLOCK(sc);
 
 	/* extract tstamp from last beacon and convert to TU */
 	nexttbtt = TSF_TO_TU(le32dec(ni->ni_tstamp.data + 4),
 			     le32dec(ni->ni_tstamp.data));
 
 	tsf_beacon = ((uint64_t) le32dec(ni->ni_tstamp.data + 4)) << 32;
 	tsf_beacon |= le32dec(ni->ni_tstamp.data);
 
 	if (ic->ic_opmode == IEEE80211_M_HOSTAP ||
 	    ic->ic_opmode == IEEE80211_M_MBSS) {
 		/*
 		 * For multi-bss ap/mesh support beacons are either staggered
 		 * evenly over N slots or burst together.  For the former
 		 * arrange for the SWBA to be delivered for each slot.
 		 * Slots that are not occupied will generate nothing.
 		 */
 		/* NB: the beacon interval is kept internally in TU's */
 		intval = ni->ni_intval & HAL_BEACON_PERIOD;
 		if (sc->sc_stagbeacons)
 			intval /= ATH_BCBUF;
 	} else {
 		/* NB: the beacon interval is kept internally in TU's */
 		intval = ni->ni_intval & HAL_BEACON_PERIOD;
 	}
+
+	/*
+	 * Note: rounding up to the next intval can cause problems with
+	 * bad APs when we're in powersave mode.
+	 *
+	 * In STA mode with powersave enabled, beacons are only received
+	 * whenever the beacon timer fires to wake up the hardware.
+	 * Now, if this is rounded up to the next intval, it assumes
+	 * that the AP has started transmitting beacons at TSF values that
+	 * are multiples of intval, versus say being 25 TU off.
+	 *
+	 * The specification (802.11-2012 10.1.3.2 - Beacon Generation in
+	 * Infrastructure Networks) requires APs be beaconing at a
+	 * mutiple of intval.  So, if bintval=100, then we shouldn't
+	 * get beacons at intervals other than around multiples of 100.
+	 */
 	if (nexttbtt == 0)		/* e.g. for ap mode */
 		nexttbtt = intval;
-	else if (intval)		/* NB: can be 0 for monitor mode */
+	else
 		nexttbtt = roundup(nexttbtt, intval);
+
 	DPRINTF(sc, ATH_DEBUG_BEACON, "%s: nexttbtt %u intval %u (%u)\n",
 		__func__, nexttbtt, intval, ni->ni_intval);
 	if (ic->ic_opmode == IEEE80211_M_STA && !sc->sc_swbmiss) {
 		HAL_BEACON_STATE bs;
 		int dtimperiod, dtimcount;
 		int cfpperiod, cfpcount;
 
 		/*
 		 * Setup dtim and cfp parameters according to
 		 * last beacon we received (which may be none).
 		 */
 		dtimperiod = ni->ni_dtim_period;
 		if (dtimperiod <= 0)		/* NB: 0 if not known */
 			dtimperiod = 1;
 		dtimcount = ni->ni_dtim_count;
 		if (dtimcount >= dtimperiod)	/* NB: sanity check */
 			dtimcount = 0;		/* XXX? */
 		cfpperiod = 1;			/* NB: no PCF support yet */
 		cfpcount = 0;
 		/*
 		 * Pull nexttbtt forward to reflect the current
 		 * TSF and calculate dtim+cfp state for the result.
 		 */
 		tsf = ath_hal_gettsf64(ah);
 		tsftu = TSF_TO_TU(tsf>>32, tsf) + FUDGE;
 
 		DPRINTF(sc, ATH_DEBUG_BEACON,
 		    "%s: beacon tsf=%llu, hw tsf=%llu, nexttbtt=%u, tsftu=%u\n",
 		    __func__,
 		    (unsigned long long) tsf_beacon,
 		    (unsigned long long) tsf,
 		    nexttbtt,
 		    tsftu);
 		DPRINTF(sc, ATH_DEBUG_BEACON,
 		    "%s: beacon tsf=%llu, hw tsf=%llu, tsf delta=%lld\n",
 		    __func__,
 		    (unsigned long long) tsf_beacon,
 		    (unsigned long long) tsf,
 		    (long long) tsf -
 		    (long long) tsf_beacon);
 
 		DPRINTF(sc, ATH_DEBUG_BEACON,
 		    "%s: nexttbtt=%llu, beacon tsf delta=%lld\n",
 		    __func__,
 		    (unsigned long long) nexttbtt,
 		    (long long) ((long long) nexttbtt * 1024LL) - (long long) tsf_beacon);
 
 		/* XXX cfpcount? */
 
 		if (nexttbtt > tsftu) {
 			uint32_t countdiff, oldtbtt, remainder;
 
 			oldtbtt = nexttbtt;
 			remainder = (nexttbtt - tsftu) % intval;
 			nexttbtt = tsftu + remainder;
 
 			countdiff = (oldtbtt - nexttbtt) / intval % dtimperiod;
 			if (dtimcount > countdiff) {
 				dtimcount -= countdiff;
 			} else {
 				dtimcount += dtimperiod - countdiff;
 			}
 		} else { //nexttbtt <= tsftu
 			uint32_t countdiff, oldtbtt, remainder;
 
 			oldtbtt = nexttbtt;
 			remainder = (tsftu - nexttbtt) % intval;
 			nexttbtt = tsftu - remainder + intval;
 			countdiff = (nexttbtt - oldtbtt) / intval % dtimperiod;
 			if (dtimcount > countdiff) {
 				dtimcount -= countdiff;
 			} else {
 				dtimcount += dtimperiod - countdiff;
 			}
 		}
 
 		DPRINTF(sc, ATH_DEBUG_BEACON,
 		    "%s: adj nexttbtt=%llu, rx tsf delta=%lld\n",
 		    __func__,
 		    (unsigned long long) nexttbtt,
 		    (long long) ((long long)nexttbtt * 1024LL) - (long long)tsf);
 
 		memset(&bs, 0, sizeof(bs));
 		bs.bs_intval = intval;
 		bs.bs_nexttbtt = nexttbtt;
 		bs.bs_dtimperiod = dtimperiod*intval;
 		bs.bs_nextdtim = bs.bs_nexttbtt + dtimcount*intval;
 		bs.bs_cfpperiod = cfpperiod*bs.bs_dtimperiod;
 		bs.bs_cfpnext = bs.bs_nextdtim + cfpcount*bs.bs_dtimperiod;
 		bs.bs_cfpmaxduration = 0;
 #if 0
 		/*
 		 * The 802.11 layer records the offset to the DTIM
 		 * bitmap while receiving beacons; use it here to
 		 * enable h/w detection of our AID being marked in
 		 * the bitmap vector (to indicate frames for us are
 		 * pending at the AP).
 		 * XXX do DTIM handling in s/w to WAR old h/w bugs
 		 * XXX enable based on h/w rev for newer chips
 		 */
 		bs.bs_timoffset = ni->ni_timoff;
 #endif
 		/*
 		 * Calculate the number of consecutive beacons to miss
 		 * before taking a BMISS interrupt.
 		 * Note that we clamp the result to at most 10 beacons.
 		 */
 		bs.bs_bmissthreshold = vap->iv_bmissthreshold;
 		if (bs.bs_bmissthreshold > 10)
 			bs.bs_bmissthreshold = 10;
 		else if (bs.bs_bmissthreshold <= 0)
 			bs.bs_bmissthreshold = 1;
 
 		/*
 		 * Calculate sleep duration.  The configuration is
 		 * given in ms.  We insure a multiple of the beacon
 		 * period is used.  Also, if the sleep duration is
 		 * greater than the DTIM period then it makes senses
 		 * to make it a multiple of that.
 		 *
 		 * XXX fixed at 100ms
 		 */
 		bs.bs_sleepduration =
 			roundup(IEEE80211_MS_TO_TU(100), bs.bs_intval);
 		if (bs.bs_sleepduration > bs.bs_dtimperiod)
 			bs.bs_sleepduration = roundup(bs.bs_sleepduration, bs.bs_dtimperiod);
 
 		DPRINTF(sc, ATH_DEBUG_BEACON,
 			"%s: tsf %ju tsf:tu %u intval %u nexttbtt %u dtim %u "
 			"nextdtim %u bmiss %u sleep %u cfp:period %u "
 			"maxdur %u next %u timoffset %u\n"
 			, __func__
 			, tsf
 			, tsftu
 			, bs.bs_intval
 			, bs.bs_nexttbtt
 			, bs.bs_dtimperiod
 			, bs.bs_nextdtim
 			, bs.bs_bmissthreshold
 			, bs.bs_sleepduration
 			, bs.bs_cfpperiod
 			, bs.bs_cfpmaxduration
 			, bs.bs_cfpnext
 			, bs.bs_timoffset
 		);
 		ath_hal_intrset(ah, 0);
 		ath_hal_beacontimers(ah, &bs);
 		sc->sc_imask |= HAL_INT_BMISS;
 		ath_hal_intrset(ah, sc->sc_imask);
 	} else {
 		ath_hal_intrset(ah, 0);
 		if (nexttbtt == intval)
 			intval |= HAL_BEACON_RESET_TSF;
 		if (ic->ic_opmode == IEEE80211_M_IBSS) {
 			/*
 			 * In IBSS mode enable the beacon timers but only
 			 * enable SWBA interrupts if we need to manually
 			 * prepare beacon frames.  Otherwise we use a
 			 * self-linked tx descriptor and let the hardware
 			 * deal with things.
 			 */
 			intval |= HAL_BEACON_ENA;
 			if (!sc->sc_hasveol)
 				sc->sc_imask |= HAL_INT_SWBA;
 			if ((intval & HAL_BEACON_RESET_TSF) == 0) {
 				/*
 				 * Pull nexttbtt forward to reflect
 				 * the current TSF.
 				 */
 				tsf = ath_hal_gettsf64(ah);
 				tsftu = TSF_TO_TU(tsf>>32, tsf) + FUDGE;
 				do {
 					nexttbtt += intval;
 				} while (nexttbtt < tsftu);
 			}
 			ath_beaconq_config(sc);
 		} else if (ic->ic_opmode == IEEE80211_M_HOSTAP ||
 		    ic->ic_opmode == IEEE80211_M_MBSS) {
 			/*
 			 * In AP/mesh mode we enable the beacon timers
 			 * and SWBA interrupts to prepare beacon frames.
 			 */
 			intval |= HAL_BEACON_ENA;
 			sc->sc_imask |= HAL_INT_SWBA;	/* beacon prepare */
 			ath_beaconq_config(sc);
 		}
 
 		/*
 		 * Now dirty things because for now, the EDMA HAL has
 		 * nexttbtt and intval is TU/8.
 		 */
 		if (sc->sc_isedma) {
 			nexttbtt_u8 = (nexttbtt << 3);
 			intval_u8 = (intval << 3);
 			if (intval & HAL_BEACON_ENA)
 				intval_u8 |= HAL_BEACON_ENA;
 			if (intval & HAL_BEACON_RESET_TSF)
 				intval_u8 |= HAL_BEACON_RESET_TSF;
 			ath_hal_beaconinit(ah, nexttbtt_u8, intval_u8);
 		} else
 			ath_hal_beaconinit(ah, nexttbtt, intval);
 		sc->sc_bmisscount = 0;
 		ath_hal_intrset(ah, sc->sc_imask);
 		/*
 		 * When using a self-linked beacon descriptor in
 		 * ibss mode load it once here.
 		 */
 		if (ic->ic_opmode == IEEE80211_M_IBSS && sc->sc_hasveol)
 			ath_beacon_start_adhoc(sc, vap);
 	}
 	ieee80211_free_node(ni);
 
 	ATH_LOCK(sc);
 	ath_power_restore_power_state(sc);
 	ATH_UNLOCK(sc);
 #undef FUDGE
 #undef TSF_TO_TU
 }
Index: projects/clang391-import/sys/dev/ath/if_ath_ioctl.c
===================================================================
--- projects/clang391-import/sys/dev/ath/if_ath_ioctl.c	(revision 309262)
+++ projects/clang391-import/sys/dev/ath/if_ath_ioctl.c	(revision 309263)
@@ -1,307 +1,309 @@
 /*-
  * Copyright (c) 2002-2009 Sam Leffler, Errno Consulting
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer,
  *    without modification.
  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
  *    similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any
  *    redistribution must be conditioned upon including a substantially
  *    similar Disclaimer requirement for further binary redistribution.
  *
  * NO WARRANTY
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY
  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
  * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY,
  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
  * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  * THE POSSIBILITY OF SUCH DAMAGES.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * Driver for the Atheros Wireless LAN controller.
  *
  * This software is derived from work of Atsushi Onoe; his contribution
  * is greatly appreciated.
  */
 
 #include "opt_inet.h"
 #include "opt_ath.h"
 /*
  * This is needed for register operations which are performed
  * by the driver - eg, calls to ath_hal_gettsf32().
  *
  * It's also required for any AH_DEBUG checks in here, eg the
  * module dependencies.
  */
 #include "opt_ah.h"
 #include "opt_wlan.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sysctl.h>
 #include <sys/mbuf.h>
 #include <sys/malloc.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/kernel.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/errno.h>
 #include <sys/callout.h>
 #include <sys/bus.h>
 #include <sys/endian.h>
 #include <sys/kthread.h>
 #include <sys/taskqueue.h>
 #include <sys/priv.h>
 #include <sys/module.h>
 #include <sys/ktr.h>
 #include <sys/smp.h>	/* for mp_ncpus */
 
 #include <machine/bus.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/if_media.h>
 #include <net/if_types.h>
 #include <net/if_arp.h>
 #include <net/ethernet.h>
 #include <net/if_llc.h>
 
 #include <net80211/ieee80211_var.h>
 #include <net80211/ieee80211_regdomain.h>
 #ifdef IEEE80211_SUPPORT_SUPERG
 #include <net80211/ieee80211_superg.h>
 #endif
 #ifdef IEEE80211_SUPPORT_TDMA
 #include <net80211/ieee80211_tdma.h>
 #endif
 
 #include <net/bpf.h>
 
 #ifdef INET
 #include <netinet/in.h>
 #include <netinet/if_ether.h>
 #endif
 
 #include <dev/ath/if_athvar.h>
 #include <dev/ath/ath_hal/ah_devid.h>		/* XXX for softled */
 #include <dev/ath/ath_hal/ah_diagcodes.h>
 
 #include <dev/ath/if_ath_debug.h>
 #include <dev/ath/if_ath_misc.h>
 #include <dev/ath/if_ath_btcoex.h>
 #include <dev/ath/if_ath_spectral.h>
 #include <dev/ath/if_ath_lna_div.h>
 #include <dev/ath/if_athdfs.h>
 
 #ifdef	IEEE80211_SUPPORT_TDMA
 #include <dev/ath/if_ath_tdma.h>
 #endif
 
 #include <dev/ath/if_ath_ioctl.h>
 
 /*
  * ioctl() related pieces.
  *
  * Some subsystems (eg spectral, dfs) have their own ioctl method which
  * we call.
  */
 
 /*
  * Fetch the rate control statistics for the given node.
  */
 static int
 ath_ioctl_ratestats(struct ath_softc *sc, struct ath_rateioctl *rs)
 {
 	struct ath_node *an;
 	struct ieee80211com *ic = &sc->sc_ic;
 	struct ieee80211_node *ni;
 	int error = 0;
 
 	/* Perform a lookup on the given node */
 	ni = ieee80211_find_node(&ic->ic_sta, rs->is_u.macaddr);
 	if (ni == NULL) {
 		error = EINVAL;
 		goto bad;
 	}
 
 	/* Lock the ath_node */
 	an = ATH_NODE(ni);
 	ATH_NODE_LOCK(an);
 
 	/* Fetch the rate control stats for this node */
 	error = ath_rate_fetch_node_stats(sc, an, rs);
 
 	/* No matter what happens here, just drop through */
 
 	/* Unlock the ath_node */
 	ATH_NODE_UNLOCK(an);
 
 	/* Unref the node */
 	ieee80211_node_decref(ni);
 
 bad:
 	return (error);
 }
 
 #ifdef ATH_DIAGAPI
 /*
  * Diagnostic interface to the HAL.  This is used by various
  * tools to do things like retrieve register contents for
  * debugging.  The mechanism is intentionally opaque so that
  * it can change frequently w/o concern for compatibility.
  */
 static int
 ath_ioctl_diag(struct ath_softc *sc, struct ath_diag *ad)
 {
 	struct ath_hal *ah = sc->sc_ah;
 	u_int id = ad->ad_id & ATH_DIAG_ID;
 	void *indata = NULL;
 	void *outdata = NULL;
 	u_int32_t insize = ad->ad_in_size;
 	u_int32_t outsize = ad->ad_out_size;
 	int error = 0;
 
 	if (ad->ad_id & ATH_DIAG_IN) {
 		/*
 		 * Copy in data.
 		 */
 		indata = malloc(insize, M_TEMP, M_NOWAIT);
 		if (indata == NULL) {
 			error = ENOMEM;
 			goto bad;
 		}
 		error = copyin(ad->ad_in_data, indata, insize);
 		if (error)
 			goto bad;
 	}
 	if (ad->ad_id & ATH_DIAG_DYN) {
 		/*
 		 * Allocate a buffer for the results (otherwise the HAL
 		 * returns a pointer to a buffer where we can read the
 		 * results).  Note that we depend on the HAL leaving this
 		 * pointer for us to use below in reclaiming the buffer;
 		 * may want to be more defensive.
 		 */
 		outdata = malloc(outsize, M_TEMP, M_NOWAIT);
 		if (outdata == NULL) {
 			error = ENOMEM;
 			goto bad;
 		}
 	}
 
 
 	ATH_LOCK(sc);
 	if (id != HAL_DIAG_REGS)
 		ath_power_set_power_state(sc, HAL_PM_AWAKE);
 	ATH_UNLOCK(sc);
 
 	if (ath_hal_getdiagstate(ah, id, indata, insize, &outdata, &outsize)) {
 		if (outsize < ad->ad_out_size)
 			ad->ad_out_size = outsize;
 		if (outdata != NULL)
 			error = copyout(outdata, ad->ad_out_data,
 					ad->ad_out_size);
 	} else {
 		error = EINVAL;
 	}
 
 	ATH_LOCK(sc);
 	if (id != HAL_DIAG_REGS)
 		ath_power_restore_power_state(sc);
 	ATH_UNLOCK(sc);
 
 bad:
 	if ((ad->ad_id & ATH_DIAG_IN) && indata != NULL)
 		free(indata, M_TEMP);
 	if ((ad->ad_id & ATH_DIAG_DYN) && outdata != NULL)
 		free(outdata, M_TEMP);
 	return error;
 }
 #endif /* ATH_DIAGAPI */
 
 int
 ath_ioctl(struct ieee80211com *ic, u_long cmd, void *data)
 {
 	struct ifreq *ifr = data;
 	struct ath_softc *sc = ic->ic_softc;
 
 	switch (cmd) {
 	case SIOCGATHSTATS: {
 		struct ieee80211vap *vap;
 		struct ifnet *ifp;
 		const HAL_RATE_TABLE *rt;
 
 		/* NB: embed these numbers to get a consistent view */
 		sc->sc_stats.ast_tx_packets = 0;
 		sc->sc_stats.ast_rx_packets = 0;
 		TAILQ_FOREACH(vap, &ic->ic_vaps, iv_next) {
 			ifp = vap->iv_ifp;
 			sc->sc_stats.ast_tx_packets += ifp->if_get_counter(ifp,
 			    IFCOUNTER_OPACKETS);
 			sc->sc_stats.ast_rx_packets += ifp->if_get_counter(ifp,
 			    IFCOUNTER_IPACKETS);
 		}
 		sc->sc_stats.ast_tx_rssi = ATH_RSSI(sc->sc_halstats.ns_avgtxrssi);
 		sc->sc_stats.ast_rx_rssi = ATH_RSSI(sc->sc_halstats.ns_avgrssi);
 #ifdef IEEE80211_SUPPORT_TDMA
 		sc->sc_stats.ast_tdma_tsfadjp = TDMA_AVG(sc->sc_avgtsfdeltap);
 		sc->sc_stats.ast_tdma_tsfadjm = TDMA_AVG(sc->sc_avgtsfdeltam);
 #endif
 		rt = sc->sc_currates;
 		sc->sc_stats.ast_tx_rate =
 		    rt->info[sc->sc_txrix].dot11Rate &~ IEEE80211_RATE_BASIC;
 		if (rt->info[sc->sc_txrix].phy & IEEE80211_T_HT)
 			sc->sc_stats.ast_tx_rate |= IEEE80211_RATE_MCS;
 		return copyout(&sc->sc_stats,
 		    ifr->ifr_data, sizeof (sc->sc_stats));
 	}
 	case SIOCGATHAGSTATS:
 		return copyout(&sc->sc_aggr_stats,
 		    ifr->ifr_data, sizeof (sc->sc_aggr_stats));
 	case SIOCZATHSTATS: {
 		int error;
 
 		error = priv_check(curthread, PRIV_DRIVER);
 		if (error == 0) {
 			memset(&sc->sc_stats, 0, sizeof(sc->sc_stats));
 			memset(&sc->sc_aggr_stats, 0,
 			    sizeof(sc->sc_aggr_stats));
 			memset(&sc->sc_intr_stats, 0,
 			    sizeof(sc->sc_intr_stats));
 		}
 		return (error);
 	}
 #ifdef ATH_DIAGAPI
 	case SIOCGATHDIAG:
 		return (ath_ioctl_diag(sc, data));
 	case SIOCGATHPHYERR:
 		return (ath_ioctl_phyerr(sc, data));
 #endif
 	case SIOCGATHSPECTRAL:
 		return (ath_ioctl_spectral(sc, data));
 	case SIOCGATHNODERATESTATS:
 		return (ath_ioctl_ratestats(sc, data));
+	case SIOCGATHBTCOEX:
+		return (ath_btcoex_ioctl(sc, data));
 	default:
 		/*
 		 * This signals the net80211 layer that we didn't handle this
 		 * ioctl.
 		 */
 		return (ENOTTY);
 	}
 }
 
Index: projects/clang391-import/sys/dev/ath/if_ath_misc.h
===================================================================
--- projects/clang391-import/sys/dev/ath/if_ath_misc.h	(revision 309262)
+++ projects/clang391-import/sys/dev/ath/if_ath_misc.h	(revision 309263)
@@ -1,142 +1,150 @@
 /*-
  * Copyright (c) 2002-2009 Sam Leffler, Errno Consulting
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer,
  *    without modification.
  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
  *    similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any
  *    redistribution must be conditioned upon including a substantially
  *    similar Disclaimer requirement for further binary redistribution.
  *
  * NO WARRANTY
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY
  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
  * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY,
  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
  * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  * THE POSSIBILITY OF SUCH DAMAGES.
  *
  * $FreeBSD$
  */
 #ifndef	__IF_ATH_MISC_H__
 #define	__IF_ATH_MISC_H__
 
 /*
  * This is where definitions for "public things" in if_ath.c
  * will go for the time being.
  *
  * Anything in here should eventually be moved out of if_ath.c
  * and into something else.
  */
 
 extern int ath_rxbuf;
 extern int ath_txbuf;
 extern int ath_txbuf_mgmt;
 
 extern int ath_tx_findrix(const struct ath_softc *sc, uint8_t rate);
 
 extern struct ath_buf * ath_getbuf(struct ath_softc *sc,
 	    ath_buf_type_t btype);
 extern struct ath_buf * _ath_getbuf_locked(struct ath_softc *sc,
 	    ath_buf_type_t btype);
 extern struct ath_buf * ath_buf_clone(struct ath_softc *sc,
 	    struct ath_buf *bf);
 /* XXX change this to NULL the buffer pointer? */
 extern void ath_freebuf(struct ath_softc *sc, struct ath_buf *bf);
 extern void ath_returnbuf_head(struct ath_softc *sc, struct ath_buf *bf);
 extern void ath_returnbuf_tail(struct ath_softc *sc, struct ath_buf *bf);
 
 extern int ath_reset(struct ath_softc *, ATH_RESET_TYPE);
 extern void ath_tx_default_comp(struct ath_softc *sc, struct ath_buf *bf,
 	    int fail);
 extern void ath_tx_update_ratectrl(struct ath_softc *sc,
 	    struct ieee80211_node *ni, struct ath_rc_series *rc,
 	    struct ath_tx_status *ts, int frmlen, int nframes, int nbad);
 
 extern	int ath_hal_gethangstate(struct ath_hal *ah, uint32_t mask,
 	    uint32_t *hangs);
 
 extern void ath_tx_freebuf(struct ath_softc *sc, struct ath_buf *bf,
     int status);
 extern	void ath_txq_freeholdingbuf(struct ath_softc *sc,
 	    struct ath_txq *txq);
 
 extern void ath_txqmove(struct ath_txq *dst, struct ath_txq *src);
 
 extern void ath_mode_init(struct ath_softc *sc);
 
 extern void ath_setdefantenna(struct ath_softc *sc, u_int antenna);
 
 extern void ath_setslottime(struct ath_softc *sc);
 
 extern	void ath_legacy_attach_comp_func(struct ath_softc *sc);
 
 extern	void ath_tx_draintxq(struct ath_softc *sc, struct ath_txq *txq);
 
 extern	void ath_legacy_tx_drain(struct ath_softc *sc,
 	    ATH_RESET_TYPE reset_type);
 
 extern	void ath_tx_process_buf_completion(struct ath_softc *sc,
 	    struct ath_txq *txq, struct ath_tx_status *ts, struct ath_buf *bf);
 
 extern	int ath_stoptxdma(struct ath_softc *sc);
 
 extern	void ath_tx_update_tim(struct ath_softc *sc,
 	    struct ieee80211_node *ni, int enable);
 
 /*
  * This is only here so that the RX proc function can call it.
  * It's very likely that the "start TX after RX" call should be
  * done via something in if_ath.c, moving "rx tasklet" into
  * if_ath.c and do the ath_start() call there.  Once that's done,
  * we can kill this.
  */
 extern void ath_start(struct ifnet *ifp);
 extern	void ath_start_task(void *arg, int npending);
 
 extern void ath_tx_dump(struct ath_softc *sc, struct ath_txq *txq);
 
 /*
  * Power state tracking.
  */
-extern	void _ath_power_setpower(struct ath_softc *sc, int power_state, const char *file, int line);
-extern	void _ath_power_set_selfgen(struct ath_softc *sc, int power_state, const char *file, int line);
-extern	void _ath_power_set_power_state(struct ath_softc *sc, int power_state, const char *file, int line);
-extern	void _ath_power_restore_power_state(struct ath_softc *sc, const char *file, int line);
+extern	void _ath_power_setpower(struct ath_softc *sc, int power_state,
+	    int selfgen, const char *file, int line);
+extern	void _ath_power_set_selfgen(struct ath_softc *sc,
+	    int power_state, const char *file, int line);
+extern	void _ath_power_set_power_state(struct ath_softc *sc,
+	    int power_state, const char *file, int line);
+extern	void _ath_power_restore_power_state(struct ath_softc *sc,
+	    const char *file, int line);
 
-#define	ath_power_setpower(sc, ps) _ath_power_setpower(sc, ps, __FILE__, __LINE__)
-#define	ath_power_setselfgen(sc, ps) _ath_power_set_selfgen(sc, ps, __FILE__, __LINE__)
-#define	ath_power_set_power_state(sc, ps) _ath_power_set_power_state(sc, ps, __FILE__, __LINE__)
-#define	ath_power_restore_power_state(sc) _ath_power_restore_power_state(sc, __FILE__, __LINE__)
+#define	ath_power_setpower(sc, ps, sg) _ath_power_setpower(sc, ps, sg, \
+	    __FILE__, __LINE__)
+#define	ath_power_setselfgen(sc, ps) _ath_power_set_selfgen(sc, ps, \
+	    __FILE__, __LINE__)
+#define	ath_power_set_power_state(sc, ps) \
+	    _ath_power_set_power_state(sc, ps, __FILE__, __LINE__)
+#define	ath_power_restore_power_state(sc) \
+	    _ath_power_restore_power_state(sc, __FILE__, __LINE__)
 
 /*
  * Kick the frame TX task.
  */
 static inline void
 ath_tx_kick(struct ath_softc *sc)
 {
 
 	/* XXX NULL for now */
 }
 
 /*
  * Kick the software TX queue task.
  */
 static inline void
 ath_tx_swq_kick(struct ath_softc *sc)
 {
 
 	taskqueue_enqueue(sc->sc_tq, &sc->sc_txqtask);
 }
 
 #endif
Index: projects/clang391-import/sys/dev/ath/if_ath_rx.c
===================================================================
--- projects/clang391-import/sys/dev/ath/if_ath_rx.c	(revision 309262)
+++ projects/clang391-import/sys/dev/ath/if_ath_rx.c	(revision 309263)
@@ -1,1508 +1,1513 @@
 /*-
  * Copyright (c) 2002-2009 Sam Leffler, Errno Consulting
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer,
  *    without modification.
  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
  *    similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any
  *    redistribution must be conditioned upon including a substantially
  *    similar Disclaimer requirement for further binary redistribution.
  *
  * NO WARRANTY
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY
  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
  * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY,
  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
  * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  * THE POSSIBILITY OF SUCH DAMAGES.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * Driver for the Atheros Wireless LAN controller.
  *
  * This software is derived from work of Atsushi Onoe; his contribution
  * is greatly appreciated.
  */
 
 #include "opt_inet.h"
 #include "opt_ath.h"
 /*
  * This is needed for register operations which are performed
  * by the driver - eg, calls to ath_hal_gettsf32().
  *
  * It's also required for any AH_DEBUG checks in here, eg the
  * module dependencies.
  */
 #include "opt_ah.h"
 #include "opt_wlan.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sysctl.h>
 #include <sys/mbuf.h>
 #include <sys/malloc.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/kernel.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/errno.h>
 #include <sys/callout.h>
 #include <sys/bus.h>
 #include <sys/endian.h>
 #include <sys/kthread.h>
 #include <sys/taskqueue.h>
 #include <sys/priv.h>
 #include <sys/module.h>
 #include <sys/ktr.h>
 #include <sys/smp.h>	/* for mp_ncpus */
 
 #include <machine/bus.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/if_media.h>
 #include <net/if_types.h>
 #include <net/if_arp.h>
 #include <net/ethernet.h>
 #include <net/if_llc.h>
 
 #include <net80211/ieee80211_var.h>
 #include <net80211/ieee80211_regdomain.h>
 #ifdef IEEE80211_SUPPORT_SUPERG
 #include <net80211/ieee80211_superg.h>
 #endif
 #ifdef IEEE80211_SUPPORT_TDMA
 #include <net80211/ieee80211_tdma.h>
 #endif
 
 #include <net/bpf.h>
 
 #ifdef INET
 #include <netinet/in.h>
 #include <netinet/if_ether.h>
 #endif
 
 #include <dev/ath/if_athvar.h>
 #include <dev/ath/ath_hal/ah_devid.h>		/* XXX for softled */
 #include <dev/ath/ath_hal/ah_diagcodes.h>
 
 #include <dev/ath/if_ath_debug.h>
 #include <dev/ath/if_ath_misc.h>
 #include <dev/ath/if_ath_tsf.h>
 #include <dev/ath/if_ath_tx.h>
 #include <dev/ath/if_ath_sysctl.h>
 #include <dev/ath/if_ath_led.h>
 #include <dev/ath/if_ath_keycache.h>
 #include <dev/ath/if_ath_rx.h>
 #include <dev/ath/if_ath_beacon.h>
 #include <dev/ath/if_athdfs.h>
 #include <dev/ath/if_ath_descdma.h>
 
 #ifdef ATH_TX99_DIAG
 #include <dev/ath/ath_tx99/ath_tx99.h>
 #endif
 
 #ifdef	ATH_DEBUG_ALQ
 #include <dev/ath/if_ath_alq.h>
 #endif
 
 #include <dev/ath/if_ath_lna_div.h>
 
 /*
  * Calculate the receive filter according to the
  * operating mode and state:
  *
  * o always accept unicast, broadcast, and multicast traffic
  * o accept PHY error frames when hardware doesn't have MIB support
  *   to count and we need them for ANI (sta mode only until recently)
  *   and we are not scanning (ANI is disabled)
  *   NB: older hal's add rx filter bits out of sight and we need to
  *	 blindly preserve them
  * o probe request frames are accepted only when operating in
  *   hostap, adhoc, mesh, or monitor modes
  * o enable promiscuous mode
  *   - when in monitor mode
  *   - if interface marked PROMISC (assumes bridge setting is filtered)
  * o accept beacons:
  *   - when operating in station mode for collecting rssi data when
  *     the station is otherwise quiet, or
  *   - when operating in adhoc mode so the 802.11 layer creates
  *     node table entries for peers,
  *   - when scanning
  *   - when doing s/w beacon miss (e.g. for ap+sta)
  *   - when operating in ap mode in 11g to detect overlapping bss that
  *     require protection
  *   - when operating in mesh mode to detect neighbors
  * o accept control frames:
  *   - when in monitor mode
  * XXX HT protection for 11n
  */
 u_int32_t
 ath_calcrxfilter(struct ath_softc *sc)
 {
 	struct ieee80211com *ic = &sc->sc_ic;
 	u_int32_t rfilt;
 
 	rfilt = HAL_RX_FILTER_UCAST | HAL_RX_FILTER_BCAST | HAL_RX_FILTER_MCAST;
 	if (!sc->sc_needmib && !sc->sc_scanning)
 		rfilt |= HAL_RX_FILTER_PHYERR;
 	if (ic->ic_opmode != IEEE80211_M_STA)
 		rfilt |= HAL_RX_FILTER_PROBEREQ;
 	/* XXX ic->ic_monvaps != 0? */
 	if (ic->ic_opmode == IEEE80211_M_MONITOR || ic->ic_promisc > 0)
 		rfilt |= HAL_RX_FILTER_PROM;
 
 	/*
 	 * Only listen to all beacons if we're scanning.
 	 *
 	 * Otherwise we only really need to hear beacons from
 	 * our own BSSID.
 	 *
 	 * IBSS? software beacon miss? Just receive all beacons.
 	 * We need to hear beacons/probe requests from everyone so
 	 * we can merge ibss.
 	 */
 	if (ic->ic_opmode == IEEE80211_M_IBSS || sc->sc_swbmiss) {
 		rfilt |= HAL_RX_FILTER_BEACON;
 	} else if (ic->ic_opmode == IEEE80211_M_STA) {
 		if (sc->sc_do_mybeacon && ! sc->sc_scanning) {
 			rfilt |= HAL_RX_FILTER_MYBEACON;
 		} else { /* scanning, non-mybeacon chips */
 			rfilt |= HAL_RX_FILTER_BEACON;
 		}
 	}
 
 	/*
 	 * NB: We don't recalculate the rx filter when
 	 * ic_protmode changes; otherwise we could do
 	 * this only when ic_protmode != NONE.
 	 */
 	if (ic->ic_opmode == IEEE80211_M_HOSTAP &&
 	    IEEE80211_IS_CHAN_ANYG(ic->ic_curchan))
 		rfilt |= HAL_RX_FILTER_BEACON;
 
 	/*
 	 * Enable hardware PS-POLL RX only for hostap mode;
 	 * STA mode sends PS-POLL frames but never
 	 * receives them.
 	 */
 	if (ath_hal_getcapability(sc->sc_ah, HAL_CAP_PSPOLL,
 	    0, NULL) == HAL_OK &&
 	    ic->ic_opmode == IEEE80211_M_HOSTAP)
 		rfilt |= HAL_RX_FILTER_PSPOLL;
 
 	if (sc->sc_nmeshvaps) {
 		rfilt |= HAL_RX_FILTER_BEACON;
 		if (sc->sc_hasbmatch)
 			rfilt |= HAL_RX_FILTER_BSSID;
 		else
 			rfilt |= HAL_RX_FILTER_PROM;
 	}
 	if (ic->ic_opmode == IEEE80211_M_MONITOR)
 		rfilt |= HAL_RX_FILTER_CONTROL;
 
 	/*
 	 * Enable RX of compressed BAR frames only when doing
 	 * 802.11n. Required for A-MPDU.
 	 */
 	if (IEEE80211_IS_CHAN_HT(ic->ic_curchan))
 		rfilt |= HAL_RX_FILTER_COMPBAR;
 
 	/*
 	 * Enable radar PHY errors if requested by the
 	 * DFS module.
 	 */
 	if (sc->sc_dodfs)
 		rfilt |= HAL_RX_FILTER_PHYRADAR;
 
 	/*
 	 * Enable spectral PHY errors if requested by the
 	 * spectral module.
 	 */
 	if (sc->sc_dospectral)
 		rfilt |= HAL_RX_FILTER_PHYRADAR;
 
 	DPRINTF(sc, ATH_DEBUG_MODE, "%s: RX filter 0x%x, %s\n",
 	    __func__, rfilt, ieee80211_opmode_name[ic->ic_opmode]);
 	return rfilt;
 }
 
 static int
 ath_legacy_rxbuf_init(struct ath_softc *sc, struct ath_buf *bf)
 {
 	struct ath_hal *ah = sc->sc_ah;
 	int error;
 	struct mbuf *m;
 	struct ath_desc *ds;
 
 	/* XXX TODO: ATH_RX_LOCK_ASSERT(sc); */
 
 	m = bf->bf_m;
 	if (m == NULL) {
 		/*
 		 * NB: by assigning a page to the rx dma buffer we
 		 * implicitly satisfy the Atheros requirement that
 		 * this buffer be cache-line-aligned and sized to be
 		 * multiple of the cache line size.  Not doing this
 		 * causes weird stuff to happen (for the 5210 at least).
 		 */
 		m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
 		if (m == NULL) {
 			DPRINTF(sc, ATH_DEBUG_ANY,
 				"%s: no mbuf/cluster\n", __func__);
 			sc->sc_stats.ast_rx_nombuf++;
 			return ENOMEM;
 		}
 		m->m_pkthdr.len = m->m_len = m->m_ext.ext_size;
 
 		error = bus_dmamap_load_mbuf_sg(sc->sc_dmat,
 					     bf->bf_dmamap, m,
 					     bf->bf_segs, &bf->bf_nseg,
 					     BUS_DMA_NOWAIT);
 		if (error != 0) {
 			DPRINTF(sc, ATH_DEBUG_ANY,
 			    "%s: bus_dmamap_load_mbuf_sg failed; error %d\n",
 			    __func__, error);
 			sc->sc_stats.ast_rx_busdma++;
 			m_freem(m);
 			return error;
 		}
 		KASSERT(bf->bf_nseg == 1,
 			("multi-segment packet; nseg %u", bf->bf_nseg));
 		bf->bf_m = m;
 	}
 	bus_dmamap_sync(sc->sc_dmat, bf->bf_dmamap, BUS_DMASYNC_PREREAD);
 
 	/*
 	 * Setup descriptors.  For receive we always terminate
 	 * the descriptor list with a self-linked entry so we'll
 	 * not get overrun under high load (as can happen with a
 	 * 5212 when ANI processing enables PHY error frames).
 	 *
 	 * To insure the last descriptor is self-linked we create
 	 * each descriptor as self-linked and add it to the end.  As
 	 * each additional descriptor is added the previous self-linked
 	 * entry is ``fixed'' naturally.  This should be safe even
 	 * if DMA is happening.  When processing RX interrupts we
 	 * never remove/process the last, self-linked, entry on the
 	 * descriptor list.  This insures the hardware always has
 	 * someplace to write a new frame.
 	 */
 	/*
 	 * 11N: we can no longer afford to self link the last descriptor.
 	 * MAC acknowledges BA status as long as it copies frames to host
 	 * buffer (or rx fifo). This can incorrectly acknowledge packets
 	 * to a sender if last desc is self-linked.
 	 */
 	ds = bf->bf_desc;
 	if (sc->sc_rxslink)
 		ds->ds_link = bf->bf_daddr;	/* link to self */
 	else
 		ds->ds_link = 0;		/* terminate the list */
 	ds->ds_data = bf->bf_segs[0].ds_addr;
 	ath_hal_setuprxdesc(ah, ds
 		, m->m_len		/* buffer size */
 		, 0
 	);
 
 	if (sc->sc_rxlink != NULL)
 		*sc->sc_rxlink = bf->bf_daddr;
 	sc->sc_rxlink = &ds->ds_link;
 	return 0;
 }
 
 /*
  * Intercept management frames to collect beacon rssi data
  * and to do ibss merges.
  */
 void
 ath_recv_mgmt(struct ieee80211_node *ni, struct mbuf *m,
 	int subtype, const struct ieee80211_rx_stats *rxs, int rssi, int nf)
 {
 	struct ieee80211vap *vap = ni->ni_vap;
 	struct ath_softc *sc = vap->iv_ic->ic_softc;
 	uint64_t tsf_beacon_old, tsf_beacon;
 	uint64_t nexttbtt;
 	int64_t tsf_delta;
 	int32_t tsf_delta_bmiss;
 	int32_t tsf_remainder;
 	uint64_t tsf_beacon_target;
 	int tsf_intval;
 
 	tsf_beacon_old = ((uint64_t) le32dec(ni->ni_tstamp.data + 4)) << 32;
 	tsf_beacon_old |= le32dec(ni->ni_tstamp.data);
 
 #define	TU_TO_TSF(_tu)	(((u_int64_t)(_tu)) << 10)
 	tsf_intval = 1;
 	if (ni->ni_intval > 0) {
 		tsf_intval = TU_TO_TSF(ni->ni_intval);
 	}
 #undef	TU_TO_TSF
 
 	/*
 	 * Call up first so subsequent work can use information
 	 * potentially stored in the node (e.g. for ibss merge).
 	 */
 	ATH_VAP(vap)->av_recv_mgmt(ni, m, subtype, rxs, rssi, nf);
 	switch (subtype) {
 	case IEEE80211_FC0_SUBTYPE_BEACON:
 
 		/*
 		 * Only do the following processing if it's for
 		 * the current BSS.
 		 *
 		 * In scan and IBSS mode we receive all beacons,
 		 * which means we need to filter out stuff
 		 * that isn't for us or we'll end up constantly
 		 * trying to sync / merge to BSSes that aren't
 		 * actually us.
 		 */
 		if (IEEE80211_ADDR_EQ(ni->ni_bssid, vap->iv_bss->ni_bssid)) {
 			/* update rssi statistics for use by the hal */
 			/* XXX unlocked check against vap->iv_bss? */
 			ATH_RSSI_LPF(sc->sc_halstats.ns_avgbrssi, rssi);
 
 
 			tsf_beacon = ((uint64_t) le32dec(ni->ni_tstamp.data + 4)) << 32;
 			tsf_beacon |= le32dec(ni->ni_tstamp.data);
 
 			nexttbtt = ath_hal_getnexttbtt(sc->sc_ah);
 
 			/*
 			 * Let's calculate the delta and remainder, so we can see
 			 * if the beacon timer from the AP is varying by more than
 			 * a few TU.  (Which would be a huge, huge problem.)
 			 */
 			tsf_delta = (long long) tsf_beacon - (long long) tsf_beacon_old;
 
 			tsf_delta_bmiss = tsf_delta / tsf_intval;
 
 			/*
 			 * If our delta is greater than half the beacon interval,
 			 * let's round the bmiss value up to the next beacon
 			 * interval.  Ie, we're running really, really early
 			 * on the next beacon.
 			 */
 			if (tsf_delta % tsf_intval > (tsf_intval / 2))
 				tsf_delta_bmiss ++;
 
 			tsf_beacon_target = tsf_beacon_old +
 			    (((unsigned long long) tsf_delta_bmiss) * (long long) tsf_intval);
 
 			/*
 			 * The remainder using '%' is between 0 .. intval-1.
 			 * If we're actually running too fast, then the remainder
 			 * will be some large number just under intval-1.
 			 * So we need to look at whether we're running
 			 * before or after the target beacon interval
 			 * and if we are, modify how we do the remainder
 			 * calculation.
 			 */
 			if (tsf_beacon < tsf_beacon_target) {
 				tsf_remainder =
 				    -(tsf_intval - ((tsf_beacon - tsf_beacon_old) % tsf_intval));
 			} else {
 				tsf_remainder = (tsf_beacon - tsf_beacon_old) % tsf_intval;
 			}
 
-			DPRINTF(sc, ATH_DEBUG_BEACON, "%s: old_tsf=%llu, new_tsf=%llu, target_tsf=%llu, delta=%lld, bmiss=%d, remainder=%d\n",
+			DPRINTF(sc, ATH_DEBUG_BEACON, "%s: old_tsf=%llu (%u), new_tsf=%llu (%u), target_tsf=%llu (%u), delta=%lld, bmiss=%d, remainder=%d\n",
 			    __func__,
 			    (unsigned long long) tsf_beacon_old,
+			    (unsigned int) (tsf_beacon_old >> 10),
 			    (unsigned long long) tsf_beacon,
+			    (unsigned int ) (tsf_beacon >> 10),
 			    (unsigned long long) tsf_beacon_target,
+			    (unsigned int) (tsf_beacon_target >> 10),
 			    (long long) tsf_delta,
 			    tsf_delta_bmiss,
 			    tsf_remainder);
 
-			DPRINTF(sc, ATH_DEBUG_BEACON, "%s: tsf=%llu, nexttbtt=%llu, delta=%d\n",
+			DPRINTF(sc, ATH_DEBUG_BEACON, "%s: tsf=%llu (%u), nexttbtt=%llu (%u), delta=%d\n",
 			    __func__,
 			    (unsigned long long) tsf_beacon,
+			    (unsigned int) (tsf_beacon >> 10),
 			    (unsigned long long) nexttbtt,
+			    (unsigned int) (nexttbtt >> 10),
 			    (int32_t) tsf_beacon - (int32_t) nexttbtt + tsf_intval);
 
 			/* We only do syncbeacon on STA VAPs; not on IBSS */
 			if (vap->iv_opmode == IEEE80211_M_STA &&
 			    sc->sc_syncbeacon &&
 			    ni == vap->iv_bss &&
 			    (vap->iv_state == IEEE80211_S_RUN || vap->iv_state == IEEE80211_S_SLEEP)) {
 				DPRINTF(sc, ATH_DEBUG_BEACON,
 				    "%s: syncbeacon=1; syncing\n",
 				    __func__);
 				/*
 				 * Resync beacon timers using the tsf of the beacon
 				 * frame we just received.
 				 */
 				ath_beacon_config(sc, vap);
 				sc->sc_syncbeacon = 0;
 			}
 		}
 
 		/* fall thru... */
 	case IEEE80211_FC0_SUBTYPE_PROBE_RESP:
 		if (vap->iv_opmode == IEEE80211_M_IBSS &&
 		    vap->iv_state == IEEE80211_S_RUN &&
 		    ieee80211_ibss_merge_check(ni)) {
 			uint32_t rstamp = sc->sc_lastrs->rs_tstamp;
 			uint64_t tsf = ath_extend_tsf(sc, rstamp,
 				ath_hal_gettsf64(sc->sc_ah));
 			/*
 			 * Handle ibss merge as needed; check the tsf on the
 			 * frame before attempting the merge.  The 802.11 spec
 			 * says the station should change it's bssid to match
 			 * the oldest station with the same ssid, where oldest
 			 * is determined by the tsf.  Note that hardware
 			 * reconfiguration happens through callback to
 			 * ath_newstate as the state machine will go from
 			 * RUN -> RUN when this happens.
 			 */
 			if (le64toh(ni->ni_tstamp.tsf) >= tsf) {
 				DPRINTF(sc, ATH_DEBUG_STATE,
 				    "ibss merge, rstamp %u tsf %ju "
 				    "tstamp %ju\n", rstamp, (uintmax_t)tsf,
 				    (uintmax_t)ni->ni_tstamp.tsf);
 				(void) ieee80211_ibss_merge(ni);
 			}
 		}
 		break;
 	}
 }
 
 #ifdef	ATH_ENABLE_RADIOTAP_VENDOR_EXT
 static void
 ath_rx_tap_vendor(struct ath_softc *sc, struct mbuf *m,
     const struct ath_rx_status *rs, u_int64_t tsf, int16_t nf)
 {
 
 	/* Fill in the extension bitmap */
 	sc->sc_rx_th.wr_ext_bitmap = htole32(1 << ATH_RADIOTAP_VENDOR_HEADER);
 
 	/* Fill in the vendor header */
 	sc->sc_rx_th.wr_vh.vh_oui[0] = 0x7f;
 	sc->sc_rx_th.wr_vh.vh_oui[1] = 0x03;
 	sc->sc_rx_th.wr_vh.vh_oui[2] = 0x00;
 
 	/* XXX what should this be? */
 	sc->sc_rx_th.wr_vh.vh_sub_ns = 0;
 	sc->sc_rx_th.wr_vh.vh_skip_len =
 	    htole16(sizeof(struct ath_radiotap_vendor_hdr));
 
 	/* General version info */
 	sc->sc_rx_th.wr_v.vh_version = 1;
 
 	sc->sc_rx_th.wr_v.vh_rx_chainmask = sc->sc_rxchainmask;
 
 	/* rssi */
 	sc->sc_rx_th.wr_v.rssi_ctl[0] = rs->rs_rssi_ctl[0];
 	sc->sc_rx_th.wr_v.rssi_ctl[1] = rs->rs_rssi_ctl[1];
 	sc->sc_rx_th.wr_v.rssi_ctl[2] = rs->rs_rssi_ctl[2];
 	sc->sc_rx_th.wr_v.rssi_ext[0] = rs->rs_rssi_ext[0];
 	sc->sc_rx_th.wr_v.rssi_ext[1] = rs->rs_rssi_ext[1];
 	sc->sc_rx_th.wr_v.rssi_ext[2] = rs->rs_rssi_ext[2];
 
 	/* evm */
 	sc->sc_rx_th.wr_v.evm[0] = rs->rs_evm0;
 	sc->sc_rx_th.wr_v.evm[1] = rs->rs_evm1;
 	sc->sc_rx_th.wr_v.evm[2] = rs->rs_evm2;
 	/* These are only populated from the AR9300 or later */
 	sc->sc_rx_th.wr_v.evm[3] = rs->rs_evm3;
 	sc->sc_rx_th.wr_v.evm[4] = rs->rs_evm4;
 
 	/* direction */
 	sc->sc_rx_th.wr_v.vh_flags = ATH_VENDOR_PKT_RX;
 
 	/* RX rate */
 	sc->sc_rx_th.wr_v.vh_rx_hwrate = rs->rs_rate;
 
 	/* RX flags */
 	sc->sc_rx_th.wr_v.vh_rs_flags = rs->rs_flags;
 
 	if (rs->rs_isaggr)
 		sc->sc_rx_th.wr_v.vh_flags |= ATH_VENDOR_PKT_ISAGGR;
 	if (rs->rs_moreaggr)
 		sc->sc_rx_th.wr_v.vh_flags |= ATH_VENDOR_PKT_MOREAGGR;
 
 	/* phyerr info */
 	if (rs->rs_status & HAL_RXERR_PHY) {
 		sc->sc_rx_th.wr_v.vh_phyerr_code = rs->rs_phyerr;
 		sc->sc_rx_th.wr_v.vh_flags |= ATH_VENDOR_PKT_RXPHYERR;
 	} else {
 		sc->sc_rx_th.wr_v.vh_phyerr_code = 0xff;
 	}
 	sc->sc_rx_th.wr_v.vh_rs_status = rs->rs_status;
 	sc->sc_rx_th.wr_v.vh_rssi = rs->rs_rssi;
 }
 #endif	/* ATH_ENABLE_RADIOTAP_VENDOR_EXT */
 
 static void
 ath_rx_tap(struct ath_softc *sc, struct mbuf *m,
 	const struct ath_rx_status *rs, u_int64_t tsf, int16_t nf)
 {
 #define	CHAN_HT20	htole32(IEEE80211_CHAN_HT20)
 #define	CHAN_HT40U	htole32(IEEE80211_CHAN_HT40U)
 #define	CHAN_HT40D	htole32(IEEE80211_CHAN_HT40D)
 #define	CHAN_HT		(CHAN_HT20|CHAN_HT40U|CHAN_HT40D)
 	const HAL_RATE_TABLE *rt;
 	uint8_t rix;
 
 	rt = sc->sc_currates;
 	KASSERT(rt != NULL, ("no rate table, mode %u", sc->sc_curmode));
 	rix = rt->rateCodeToIndex[rs->rs_rate];
 	sc->sc_rx_th.wr_rate = sc->sc_hwmap[rix].ieeerate;
 	sc->sc_rx_th.wr_flags = sc->sc_hwmap[rix].rxflags;
 #ifdef AH_SUPPORT_AR5416
 	sc->sc_rx_th.wr_chan_flags &= ~CHAN_HT;
 	if (rs->rs_status & HAL_RXERR_PHY) {
 		/*
 		 * PHY error - make sure the channel flags
 		 * reflect the actual channel configuration,
 		 * not the received frame.
 		 */
 		if (IEEE80211_IS_CHAN_HT40U(sc->sc_curchan))
 			sc->sc_rx_th.wr_chan_flags |= CHAN_HT40U;
 		else if (IEEE80211_IS_CHAN_HT40D(sc->sc_curchan))
 			sc->sc_rx_th.wr_chan_flags |= CHAN_HT40D;
 		else if (IEEE80211_IS_CHAN_HT20(sc->sc_curchan))
 			sc->sc_rx_th.wr_chan_flags |= CHAN_HT20;
 	} else if (sc->sc_rx_th.wr_rate & IEEE80211_RATE_MCS) {	/* HT rate */
 		struct ieee80211com *ic = &sc->sc_ic;
 
 		if ((rs->rs_flags & HAL_RX_2040) == 0)
 			sc->sc_rx_th.wr_chan_flags |= CHAN_HT20;
 		else if (IEEE80211_IS_CHAN_HT40U(ic->ic_curchan))
 			sc->sc_rx_th.wr_chan_flags |= CHAN_HT40U;
 		else
 			sc->sc_rx_th.wr_chan_flags |= CHAN_HT40D;
 		if ((rs->rs_flags & HAL_RX_GI) == 0)
 			sc->sc_rx_th.wr_flags |= IEEE80211_RADIOTAP_F_SHORTGI;
 	}
 
 #endif
 	sc->sc_rx_th.wr_tsf = htole64(ath_extend_tsf(sc, rs->rs_tstamp, tsf));
 	if (rs->rs_status & HAL_RXERR_CRC)
 		sc->sc_rx_th.wr_flags |= IEEE80211_RADIOTAP_F_BADFCS;
 	/* XXX propagate other error flags from descriptor */
 	sc->sc_rx_th.wr_antnoise = nf;
 	sc->sc_rx_th.wr_antsignal = nf + rs->rs_rssi;
 	sc->sc_rx_th.wr_antenna = rs->rs_antenna;
 #undef CHAN_HT
 #undef CHAN_HT20
 #undef CHAN_HT40U
 #undef CHAN_HT40D
 }
 
 static void
 ath_handle_micerror(struct ieee80211com *ic,
 	struct ieee80211_frame *wh, int keyix)
 {
 	struct ieee80211_node *ni;
 
 	/* XXX recheck MIC to deal w/ chips that lie */
 	/* XXX discard MIC errors on !data frames */
 	ni = ieee80211_find_rxnode(ic, (const struct ieee80211_frame_min *) wh);
 	if (ni != NULL) {
 		ieee80211_notify_michael_failure(ni->ni_vap, wh, keyix);
 		ieee80211_free_node(ni);
 	}
 }
 
 /*
  * Process a single packet.
  *
  * The mbuf must already be synced, unmapped and removed from bf->bf_m
  * by this stage.
  *
  * The mbuf must be consumed by this routine - either passed up the
  * net80211 stack, put on the holding queue, or freed.
  */
 int
 ath_rx_pkt(struct ath_softc *sc, struct ath_rx_status *rs, HAL_STATUS status,
     uint64_t tsf, int nf, HAL_RX_QUEUE qtype, struct ath_buf *bf,
     struct mbuf *m)
 {
 	uint64_t rstamp;
 	/* XXX TODO: make this an mbuf tag? */
 	struct ieee80211_rx_stats rxs;
 	int len, type, i;
 	struct ieee80211com *ic = &sc->sc_ic;
 	struct ieee80211_node *ni;
 	int is_good = 0;
 	struct ath_rx_edma *re = &sc->sc_rxedma[qtype];
 
 	/*
 	 * Calculate the correct 64 bit TSF given
 	 * the TSF64 register value and rs_tstamp.
 	 */
 	rstamp = ath_extend_tsf(sc, rs->rs_tstamp, tsf);
 
 	/* These aren't specifically errors */
 #ifdef	AH_SUPPORT_AR5416
 	if (rs->rs_flags & HAL_RX_GI)
 		sc->sc_stats.ast_rx_halfgi++;
 	if (rs->rs_flags & HAL_RX_2040)
 		sc->sc_stats.ast_rx_2040++;
 	if (rs->rs_flags & HAL_RX_DELIM_CRC_PRE)
 		sc->sc_stats.ast_rx_pre_crc_err++;
 	if (rs->rs_flags & HAL_RX_DELIM_CRC_POST)
 		sc->sc_stats.ast_rx_post_crc_err++;
 	if (rs->rs_flags & HAL_RX_DECRYPT_BUSY)
 		sc->sc_stats.ast_rx_decrypt_busy_err++;
 	if (rs->rs_flags & HAL_RX_HI_RX_CHAIN)
 		sc->sc_stats.ast_rx_hi_rx_chain++;
 	if (rs->rs_flags & HAL_RX_STBC)
 		sc->sc_stats.ast_rx_stbc++;
 #endif /* AH_SUPPORT_AR5416 */
 
 	if (rs->rs_status != 0) {
 		if (rs->rs_status & HAL_RXERR_CRC)
 			sc->sc_stats.ast_rx_crcerr++;
 		if (rs->rs_status & HAL_RXERR_FIFO)
 			sc->sc_stats.ast_rx_fifoerr++;
 		if (rs->rs_status & HAL_RXERR_PHY) {
 			sc->sc_stats.ast_rx_phyerr++;
 			/* Process DFS radar events */
 			if ((rs->rs_phyerr == HAL_PHYERR_RADAR) ||
 			    (rs->rs_phyerr == HAL_PHYERR_FALSE_RADAR_EXT)) {
 				/* Now pass it to the radar processing code */
 				ath_dfs_process_phy_err(sc, m, rstamp, rs);
 			}
 
 			/* Be suitably paranoid about receiving phy errors out of the stats array bounds */
 			if (rs->rs_phyerr < 64)
 				sc->sc_stats.ast_rx_phy[rs->rs_phyerr]++;
 			goto rx_error;	/* NB: don't count in ierrors */
 		}
 		if (rs->rs_status & HAL_RXERR_DECRYPT) {
 			/*
 			 * Decrypt error.  If the error occurred
 			 * because there was no hardware key, then
 			 * let the frame through so the upper layers
 			 * can process it.  This is necessary for 5210
 			 * parts which have no way to setup a ``clear''
 			 * key cache entry.
 			 *
 			 * XXX do key cache faulting
 			 */
 			if (rs->rs_keyix == HAL_RXKEYIX_INVALID)
 				goto rx_accept;
 			sc->sc_stats.ast_rx_badcrypt++;
 		}
 		/*
 		 * Similar as above - if the failure was a keymiss
 		 * just punt it up to the upper layers for now.
 		 */
 		if (rs->rs_status & HAL_RXERR_KEYMISS) {
 			sc->sc_stats.ast_rx_keymiss++;
 			goto rx_accept;
 		}
 		if (rs->rs_status & HAL_RXERR_MIC) {
 			sc->sc_stats.ast_rx_badmic++;
 			/*
 			 * Do minimal work required to hand off
 			 * the 802.11 header for notification.
 			 */
 			/* XXX frag's and qos frames */
 			len = rs->rs_datalen;
 			if (len >= sizeof (struct ieee80211_frame)) {
 				ath_handle_micerror(ic,
 				    mtod(m, struct ieee80211_frame *),
 				    sc->sc_splitmic ?
 					rs->rs_keyix-32 : rs->rs_keyix);
 			}
 		}
 		counter_u64_add(ic->ic_ierrors, 1);
 rx_error:
 		/*
 		 * Cleanup any pending partial frame.
 		 */
 		if (re->m_rxpending != NULL) {
 			m_freem(re->m_rxpending);
 			re->m_rxpending = NULL;
 		}
 		/*
 		 * When a tap is present pass error frames
 		 * that have been requested.  By default we
 		 * pass decrypt+mic errors but others may be
 		 * interesting (e.g. crc).
 		 */
 		if (ieee80211_radiotap_active(ic) &&
 		    (rs->rs_status & sc->sc_monpass)) {
 			/* NB: bpf needs the mbuf length setup */
 			len = rs->rs_datalen;
 			m->m_pkthdr.len = m->m_len = len;
 			ath_rx_tap(sc, m, rs, rstamp, nf);
 #ifdef	ATH_ENABLE_RADIOTAP_VENDOR_EXT
 			ath_rx_tap_vendor(sc, m, rs, rstamp, nf);
 #endif	/* ATH_ENABLE_RADIOTAP_VENDOR_EXT */
 			ieee80211_radiotap_rx_all(ic, m);
 		}
 		/* XXX pass MIC errors up for s/w reclaculation */
 		m_freem(m); m = NULL;
 		goto rx_next;
 	}
 rx_accept:
 	len = rs->rs_datalen;
 	m->m_len = len;
 
 	if (rs->rs_more) {
 		/*
 		 * Frame spans multiple descriptors; save
 		 * it for the next completed descriptor, it
 		 * will be used to construct a jumbogram.
 		 */
 		if (re->m_rxpending != NULL) {
 			/* NB: max frame size is currently 2 clusters */
 			sc->sc_stats.ast_rx_toobig++;
 			m_freem(re->m_rxpending);
 		}
 		m->m_pkthdr.len = len;
 		re->m_rxpending = m;
 		m = NULL;
 		goto rx_next;
 	} else if (re->m_rxpending != NULL) {
 		/*
 		 * This is the second part of a jumbogram,
 		 * chain it to the first mbuf, adjust the
 		 * frame length, and clear the rxpending state.
 		 */
 		re->m_rxpending->m_next = m;
 		re->m_rxpending->m_pkthdr.len += len;
 		m = re->m_rxpending;
 		re->m_rxpending = NULL;
 	} else {
 		/*
 		 * Normal single-descriptor receive; setup packet length.
 		 */
 		m->m_pkthdr.len = len;
 	}
 
 	/*
 	 * Validate rs->rs_antenna.
 	 *
 	 * Some users w/ AR9285 NICs have reported crashes
 	 * here because rs_antenna field is bogusly large.
 	 * Let's enforce the maximum antenna limit of 8
 	 * (and it shouldn't be hard coded, but that's a
 	 * separate problem) and if there's an issue, print
 	 * out an error and adjust rs_antenna to something
 	 * sensible.
 	 *
 	 * This code should be removed once the actual
 	 * root cause of the issue has been identified.
 	 * For example, it may be that the rs_antenna
 	 * field is only valid for the last frame of
 	 * an aggregate and it just happens that it is
 	 * "mostly" right. (This is a general statement -
 	 * the majority of the statistics are only valid
 	 * for the last frame in an aggregate.
 	 */
 	if (rs->rs_antenna > 7) {
 		device_printf(sc->sc_dev, "%s: rs_antenna > 7 (%d)\n",
 		    __func__, rs->rs_antenna);
 #ifdef	ATH_DEBUG
 		ath_printrxbuf(sc, bf, 0, status == HAL_OK);
 #endif /* ATH_DEBUG */
 		rs->rs_antenna = 0;	/* XXX better than nothing */
 	}
 
 	/*
 	 * If this is an AR9285/AR9485, then the receive and LNA
 	 * configuration is stored in RSSI[2] / EXTRSSI[2].
 	 * We can extract this out to build a much better
 	 * receive antenna profile.
 	 *
 	 * Yes, this just blurts over the above RX antenna field
 	 * for now.  It's fine, the AR9285 doesn't really use
 	 * that.
 	 *
 	 * Later on we should store away the fine grained LNA
 	 * information and keep separate counters just for
 	 * that.  It'll help when debugging the AR9285/AR9485
 	 * combined diversity code.
 	 */
 	if (sc->sc_rx_lnamixer) {
 		rs->rs_antenna = 0;
 
 		/* Bits 0:1 - the LNA configuration used */
 		rs->rs_antenna |=
 		    ((rs->rs_rssi_ctl[2] & HAL_RX_LNA_CFG_USED)
 		      >> HAL_RX_LNA_CFG_USED_S);
 
 		/* Bit 2 - the external RX antenna switch */
 		if (rs->rs_rssi_ctl[2] & HAL_RX_LNA_EXTCFG)
 			rs->rs_antenna |= 0x4;
 	}
 
 	sc->sc_stats.ast_ant_rx[rs->rs_antenna]++;
 
 	/*
 	 * Populate the rx status block.  When there are bpf
 	 * listeners we do the additional work to provide
 	 * complete status.  Otherwise we fill in only the
 	 * material required by ieee80211_input.  Note that
 	 * noise setting is filled in above.
 	 */
 	if (ieee80211_radiotap_active(ic)) {
 		ath_rx_tap(sc, m, rs, rstamp, nf);
 #ifdef	ATH_ENABLE_RADIOTAP_VENDOR_EXT
 		ath_rx_tap_vendor(sc, m, rs, rstamp, nf);
 #endif	/* ATH_ENABLE_RADIOTAP_VENDOR_EXT */
 	}
 
 	/*
 	 * From this point on we assume the frame is at least
 	 * as large as ieee80211_frame_min; verify that.
 	 */
 	if (len < IEEE80211_MIN_LEN) {
 		if (!ieee80211_radiotap_active(ic)) {
 			DPRINTF(sc, ATH_DEBUG_RECV,
 			    "%s: short packet %d\n", __func__, len);
 			sc->sc_stats.ast_rx_tooshort++;
 		} else {
 			/* NB: in particular this captures ack's */
 			ieee80211_radiotap_rx_all(ic, m);
 		}
 		m_freem(m); m = NULL;
 		goto rx_next;
 	}
 
 	if (IFF_DUMPPKTS(sc, ATH_DEBUG_RECV)) {
 		const HAL_RATE_TABLE *rt = sc->sc_currates;
 		uint8_t rix = rt->rateCodeToIndex[rs->rs_rate];
 
 		ieee80211_dump_pkt(ic, mtod(m, caddr_t), len,
 		    sc->sc_hwmap[rix].ieeerate, rs->rs_rssi);
 	}
 
 	m_adj(m, -IEEE80211_CRC_LEN);
 
 	/*
 	 * Locate the node for sender, track state, and then
 	 * pass the (referenced) node up to the 802.11 layer
 	 * for its use.
 	 */
 	ni = ieee80211_find_rxnode_withkey(ic,
 		mtod(m, const struct ieee80211_frame_min *),
 		rs->rs_keyix == HAL_RXKEYIX_INVALID ?
 			IEEE80211_KEYIX_NONE : rs->rs_keyix);
 	sc->sc_lastrs = rs;
 
 #ifdef	AH_SUPPORT_AR5416
 	if (rs->rs_isaggr)
 		sc->sc_stats.ast_rx_agg++;
 #endif /* AH_SUPPORT_AR5416 */
 
 
 	/*
 	 * Populate the per-chain RSSI values where appropriate.
 	 */
 	bzero(&rxs, sizeof(rxs));
 	rxs.r_flags |= IEEE80211_R_NF | IEEE80211_R_RSSI |
 	    IEEE80211_R_C_CHAIN |
 	    IEEE80211_R_C_NF |
 	    IEEE80211_R_C_RSSI |
 	    IEEE80211_R_TSF64 |
 	    IEEE80211_R_TSF_START;	/* XXX TODO: validate */
 	rxs.c_rssi = rs->rs_rssi;
 	rxs.c_nf = nf;
 	rxs.c_chain = 3;	/* XXX TODO: check */
 	rxs.c_rx_tsf = rstamp;
 
 	for (i = 0; i < 3; i++) {
 		rxs.c_rssi_ctl[i] = rs->rs_rssi_ctl[i];
 		rxs.c_rssi_ext[i] = rs->rs_rssi_ext[i];
 		/*
 		 * XXX note: we currently don't track
 		 * per-chain noisefloor.
 		 */
 		rxs.c_nf_ctl[i] = nf;
 		rxs.c_nf_ext[i] = nf;
 	}
 
 	if (ni != NULL) {
 		/*
 		 * Only punt packets for ampdu reorder processing for
 		 * 11n nodes; net80211 enforces that M_AMPDU is only
 		 * set for 11n nodes.
 		 */
 		if (ni->ni_flags & IEEE80211_NODE_HT)
 			m->m_flags |= M_AMPDU;
 
 		/*
 		 * Sending station is known, dispatch directly.
 		 */
 		(void) ieee80211_add_rx_params(m, &rxs);
 		type = ieee80211_input_mimo(ni, m);
 		ieee80211_free_node(ni);
 		m = NULL;
 		/*
 		 * Arrange to update the last rx timestamp only for
 		 * frames from our ap when operating in station mode.
 		 * This assumes the rx key is always setup when
 		 * associated.
 		 */
 		if (ic->ic_opmode == IEEE80211_M_STA &&
 		    rs->rs_keyix != HAL_RXKEYIX_INVALID)
 			is_good = 1;
 	} else {
 		(void) ieee80211_add_rx_params(m, &rxs);
 		type = ieee80211_input_mimo_all(ic, m);
 		m = NULL;
 	}
 
 	/*
 	 * At this point we have passed the frame up the stack; thus
 	 * the mbuf is no longer ours.
 	 */
 
 	/*
 	 * Track rx rssi and do any rx antenna management.
 	 */
 	ATH_RSSI_LPF(sc->sc_halstats.ns_avgrssi, rs->rs_rssi);
 	if (sc->sc_diversity) {
 		/*
 		 * When using fast diversity, change the default rx
 		 * antenna if diversity chooses the other antenna 3
 		 * times in a row.
 		 */
 		if (sc->sc_defant != rs->rs_antenna) {
 			if (++sc->sc_rxotherant >= 3)
 				ath_setdefantenna(sc, rs->rs_antenna);
 		} else
 			sc->sc_rxotherant = 0;
 	}
 
 	/* Handle slow diversity if enabled */
 	if (sc->sc_dolnadiv) {
 		ath_lna_rx_comb_scan(sc, rs, ticks, hz);
 	}
 
 	if (sc->sc_softled) {
 		/*
 		 * Blink for any data frame.  Otherwise do a
 		 * heartbeat-style blink when idle.  The latter
 		 * is mainly for station mode where we depend on
 		 * periodic beacon frames to trigger the poll event.
 		 */
 		if (type == IEEE80211_FC0_TYPE_DATA) {
 			const HAL_RATE_TABLE *rt = sc->sc_currates;
 			ath_led_event(sc,
 			    rt->rateCodeToIndex[rs->rs_rate]);
 		} else if (ticks - sc->sc_ledevent >= sc->sc_ledidle)
 			ath_led_event(sc, 0);
 		}
 rx_next:
 	/*
 	 * Debugging - complain if we didn't NULL the mbuf pointer
 	 * here.
 	 */
 	if (m != NULL) {
 		device_printf(sc->sc_dev,
 		    "%s: mbuf %p should've been freed!\n",
 		    __func__,
 		    m);
 	}
 	return (is_good);
 }
 
 #define	ATH_RX_MAX		128
 
 /*
  * XXX TODO: break out the "get buffers" from "call ath_rx_pkt()" like
  * the EDMA code does.
  *
  * XXX TODO: then, do all of the RX list management stuff inside
  * ATH_RX_LOCK() so we don't end up potentially racing.  The EDMA
  * code is doing it right.
  */
 static void
 ath_rx_proc(struct ath_softc *sc, int resched)
 {
 #define	PA2DESC(_sc, _pa) \
 	((struct ath_desc *)((caddr_t)(_sc)->sc_rxdma.dd_desc + \
 		((_pa) - (_sc)->sc_rxdma.dd_desc_paddr)))
 	struct ath_buf *bf;
 	struct ath_hal *ah = sc->sc_ah;
 #ifdef IEEE80211_SUPPORT_SUPERG
 	struct ieee80211com *ic = &sc->sc_ic;
 #endif
 	struct ath_desc *ds;
 	struct ath_rx_status *rs;
 	struct mbuf *m;
 	int ngood;
 	HAL_STATUS status;
 	int16_t nf;
 	u_int64_t tsf;
 	int npkts = 0;
 	int kickpcu = 0;
 	int ret;
 
 	/* XXX we must not hold the ATH_LOCK here */
 	ATH_UNLOCK_ASSERT(sc);
 	ATH_PCU_UNLOCK_ASSERT(sc);
 
 	ATH_PCU_LOCK(sc);
 	sc->sc_rxproc_cnt++;
 	kickpcu = sc->sc_kickpcu;
 	ATH_PCU_UNLOCK(sc);
 
 	ATH_LOCK(sc);
 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
 	ATH_UNLOCK(sc);
 
 	DPRINTF(sc, ATH_DEBUG_RX_PROC, "%s: called\n", __func__);
 	ngood = 0;
 	nf = ath_hal_getchannoise(ah, sc->sc_curchan);
 	sc->sc_stats.ast_rx_noise = nf;
 	tsf = ath_hal_gettsf64(ah);
 	do {
 		/*
 		 * Don't process too many packets at a time; give the
 		 * TX thread time to also run - otherwise the TX
 		 * latency can jump by quite a bit, causing throughput
 		 * degredation.
 		 */
 		if (!kickpcu && npkts >= ATH_RX_MAX)
 			break;
 
 		bf = TAILQ_FIRST(&sc->sc_rxbuf);
 		if (sc->sc_rxslink && bf == NULL) {	/* NB: shouldn't happen */
 			device_printf(sc->sc_dev, "%s: no buffer!\n", __func__);
 			break;
 		} else if (bf == NULL) {
 			/*
 			 * End of List:
 			 * this can happen for non-self-linked RX chains
 			 */
 			sc->sc_stats.ast_rx_hitqueueend++;
 			break;
 		}
 		m = bf->bf_m;
 		if (m == NULL) {		/* NB: shouldn't happen */
 			/*
 			 * If mbuf allocation failed previously there
 			 * will be no mbuf; try again to re-populate it.
 			 */
 			/* XXX make debug msg */
 			device_printf(sc->sc_dev, "%s: no mbuf!\n", __func__);
 			TAILQ_REMOVE(&sc->sc_rxbuf, bf, bf_list);
 			goto rx_proc_next;
 		}
 		ds = bf->bf_desc;
 		if (ds->ds_link == bf->bf_daddr) {
 			/* NB: never process the self-linked entry at the end */
 			sc->sc_stats.ast_rx_hitqueueend++;
 			break;
 		}
 		/* XXX sync descriptor memory */
 		/*
 		 * Must provide the virtual address of the current
 		 * descriptor, the physical address, and the virtual
 		 * address of the next descriptor in the h/w chain.
 		 * This allows the HAL to look ahead to see if the
 		 * hardware is done with a descriptor by checking the
 		 * done bit in the following descriptor and the address
 		 * of the current descriptor the DMA engine is working
 		 * on.  All this is necessary because of our use of
 		 * a self-linked list to avoid rx overruns.
 		 */
 		rs = &bf->bf_status.ds_rxstat;
 		status = ath_hal_rxprocdesc(ah, ds,
 				bf->bf_daddr, PA2DESC(sc, ds->ds_link), rs);
 #ifdef ATH_DEBUG
 		if (sc->sc_debug & ATH_DEBUG_RECV_DESC)
 			ath_printrxbuf(sc, bf, 0, status == HAL_OK);
 #endif
 
 #ifdef	ATH_DEBUG_ALQ
 		if (if_ath_alq_checkdebug(&sc->sc_alq, ATH_ALQ_EDMA_RXSTATUS))
 		    if_ath_alq_post(&sc->sc_alq, ATH_ALQ_EDMA_RXSTATUS,
 		    sc->sc_rx_statuslen, (char *) ds);
 #endif	/* ATH_DEBUG_ALQ */
 
 		if (status == HAL_EINPROGRESS)
 			break;
 
 		TAILQ_REMOVE(&sc->sc_rxbuf, bf, bf_list);
 		npkts++;
 
 		/*
 		 * Process a single frame.
 		 */
 		bus_dmamap_sync(sc->sc_dmat, bf->bf_dmamap, BUS_DMASYNC_POSTREAD);
 		bus_dmamap_unload(sc->sc_dmat, bf->bf_dmamap);
 		bf->bf_m = NULL;
 		if (ath_rx_pkt(sc, rs, status, tsf, nf, HAL_RX_QUEUE_HP, bf, m))
 			ngood++;
 rx_proc_next:
 		/*
 		 * If there's a holding buffer, insert that onto
 		 * the RX list; the hardware is now definitely not pointing
 		 * to it now.
 		 */
 		ret = 0;
 		if (sc->sc_rxedma[HAL_RX_QUEUE_HP].m_holdbf != NULL) {
 			TAILQ_INSERT_TAIL(&sc->sc_rxbuf,
 			    sc->sc_rxedma[HAL_RX_QUEUE_HP].m_holdbf,
 			    bf_list);
 			ret = ath_rxbuf_init(sc,
 			    sc->sc_rxedma[HAL_RX_QUEUE_HP].m_holdbf);
 		}
 		/*
 		 * Next, throw our buffer into the holding entry.  The hardware
 		 * may use the descriptor to read the link pointer before
 		 * DMAing the next descriptor in to write out a packet.
 		 */
 		sc->sc_rxedma[HAL_RX_QUEUE_HP].m_holdbf = bf;
 	} while (ret == 0);
 
 	/* rx signal state monitoring */
 	ath_hal_rxmonitor(ah, &sc->sc_halstats, sc->sc_curchan);
 	if (ngood)
 		sc->sc_lastrx = tsf;
 
 	ATH_KTR(sc, ATH_KTR_RXPROC, 2, "ath_rx_proc: npkts=%d, ngood=%d", npkts, ngood);
 	/* Queue DFS tasklet if needed */
 	if (resched && ath_dfs_tasklet_needed(sc, sc->sc_curchan))
 		taskqueue_enqueue(sc->sc_tq, &sc->sc_dfstask);
 
 	/*
 	 * Now that all the RX frames were handled that
 	 * need to be handled, kick the PCU if there's
 	 * been an RXEOL condition.
 	 */
 	if (resched && kickpcu) {
 		ATH_PCU_LOCK(sc);
 		ATH_KTR(sc, ATH_KTR_ERROR, 0, "ath_rx_proc: kickpcu");
 		device_printf(sc->sc_dev, "%s: kickpcu; handled %d packets\n",
 		    __func__, npkts);
 
 		/*
 		 * Go through the process of fully tearing down
 		 * the RX buffers and reinitialising them.
 		 *
 		 * There's a hardware bug that causes the RX FIFO
 		 * to get confused under certain conditions and
 		 * constantly write over the same frame, leading
 		 * the RX driver code here to get heavily confused.
 		 */
 		/*
 		 * XXX Has RX DMA stopped enough here to just call
 		 *     ath_startrecv()?
 		 * XXX Do we need to use the holding buffer to restart
 		 *     RX DMA by appending entries to the final
 		 *     descriptor?  Quite likely.
 		 */
 #if 1
 		ath_startrecv(sc);
 #else
 		/*
 		 * Disabled for now - it'd be nice to be able to do
 		 * this in order to limit the amount of CPU time spent
 		 * reinitialising the RX side (and thus minimise RX
 		 * drops) however there's a hardware issue that
 		 * causes things to get too far out of whack.
 		 */
 		/*
 		 * XXX can we hold the PCU lock here?
 		 * Are there any net80211 buffer calls involved?
 		 */
 		bf = TAILQ_FIRST(&sc->sc_rxbuf);
 		ath_hal_putrxbuf(ah, bf->bf_daddr, HAL_RX_QUEUE_HP);
 		ath_hal_rxena(ah);		/* enable recv descriptors */
 		ath_mode_init(sc);		/* set filters, etc. */
 		ath_hal_startpcurecv(ah);	/* re-enable PCU/DMA engine */
 #endif
 
 		ath_hal_intrset(ah, sc->sc_imask);
 		sc->sc_kickpcu = 0;
 		ATH_PCU_UNLOCK(sc);
 	}
 
 #ifdef IEEE80211_SUPPORT_SUPERG
 	if (resched)
 		ieee80211_ff_age_all(ic, 100);
 #endif
 
 	/*
 	 * Put the hardware to sleep again if we're done with it.
 	 */
 	ATH_LOCK(sc);
 	ath_power_restore_power_state(sc);
 	ATH_UNLOCK(sc);
 
 	/*
 	 * If we hit the maximum number of frames in this round,
 	 * reschedule for another immediate pass.  This gives
 	 * the TX and TX completion routines time to run, which
 	 * will reduce latency.
 	 */
 	if (npkts >= ATH_RX_MAX)
 		sc->sc_rx.recv_sched(sc, resched);
 
 	ATH_PCU_LOCK(sc);
 	sc->sc_rxproc_cnt--;
 	ATH_PCU_UNLOCK(sc);
 }
 #undef	PA2DESC
 #undef	ATH_RX_MAX
 
 /*
  * Only run the RX proc if it's not already running.
  * Since this may get run as part of the reset/flush path,
  * the task can't clash with an existing, running tasklet.
  */
 static void
 ath_legacy_rx_tasklet(void *arg, int npending)
 {
 	struct ath_softc *sc = arg;
 
 	ATH_KTR(sc, ATH_KTR_RXPROC, 1, "ath_rx_proc: pending=%d", npending);
 	DPRINTF(sc, ATH_DEBUG_RX_PROC, "%s: pending %u\n", __func__, npending);
 	ATH_PCU_LOCK(sc);
 	if (sc->sc_inreset_cnt > 0) {
 		device_printf(sc->sc_dev,
 		    "%s: sc_inreset_cnt > 0; skipping\n", __func__);
 		ATH_PCU_UNLOCK(sc);
 		return;
 	}
 	ATH_PCU_UNLOCK(sc);
 
 	ath_rx_proc(sc, 1);
 }
 
 static void
 ath_legacy_flushrecv(struct ath_softc *sc)
 {
 
 	ath_rx_proc(sc, 0);
 }
 
 static void
 ath_legacy_flush_rxpending(struct ath_softc *sc)
 {
 
 	/* XXX ATH_RX_LOCK_ASSERT(sc); */
 
 	if (sc->sc_rxedma[HAL_RX_QUEUE_LP].m_rxpending != NULL) {
 		m_freem(sc->sc_rxedma[HAL_RX_QUEUE_LP].m_rxpending);
 		sc->sc_rxedma[HAL_RX_QUEUE_LP].m_rxpending = NULL;
 	}
 	if (sc->sc_rxedma[HAL_RX_QUEUE_HP].m_rxpending != NULL) {
 		m_freem(sc->sc_rxedma[HAL_RX_QUEUE_HP].m_rxpending);
 		sc->sc_rxedma[HAL_RX_QUEUE_HP].m_rxpending = NULL;
 	}
 }
 
 static int
 ath_legacy_flush_rxholdbf(struct ath_softc *sc)
 {
 	struct ath_buf *bf;
 
 	/* XXX ATH_RX_LOCK_ASSERT(sc); */
 	/*
 	 * If there are RX holding buffers, free them here and return
 	 * them to the list.
 	 *
 	 * XXX should just verify that bf->bf_m is NULL, as it must
 	 * be at this point!
 	 */
 	bf = sc->sc_rxedma[HAL_RX_QUEUE_HP].m_holdbf;
 	if (bf != NULL) {
 		if (bf->bf_m != NULL)
 			m_freem(bf->bf_m);
 		bf->bf_m = NULL;
 		TAILQ_INSERT_TAIL(&sc->sc_rxbuf, bf, bf_list);
 		(void) ath_rxbuf_init(sc, bf);
 	}
 	sc->sc_rxedma[HAL_RX_QUEUE_HP].m_holdbf = NULL;
 
 	bf = sc->sc_rxedma[HAL_RX_QUEUE_LP].m_holdbf;
 	if (bf != NULL) {
 		if (bf->bf_m != NULL)
 			m_freem(bf->bf_m);
 		bf->bf_m = NULL;
 		TAILQ_INSERT_TAIL(&sc->sc_rxbuf, bf, bf_list);
 		(void) ath_rxbuf_init(sc, bf);
 	}
 	sc->sc_rxedma[HAL_RX_QUEUE_LP].m_holdbf = NULL;
 
 	return (0);
 }
 
 /*
  * Disable the receive h/w in preparation for a reset.
  */
 static void
 ath_legacy_stoprecv(struct ath_softc *sc, int dodelay)
 {
 #define	PA2DESC(_sc, _pa) \
 	((struct ath_desc *)((caddr_t)(_sc)->sc_rxdma.dd_desc + \
 		((_pa) - (_sc)->sc_rxdma.dd_desc_paddr)))
 	struct ath_hal *ah = sc->sc_ah;
 
 	ATH_RX_LOCK(sc);
 
 	ath_hal_stoppcurecv(ah);	/* disable PCU */
 	ath_hal_setrxfilter(ah, 0);	/* clear recv filter */
 	ath_hal_stopdmarecv(ah);	/* disable DMA engine */
 	/*
 	 * TODO: see if this particular DELAY() is required; it may be
 	 * masking some missing FIFO flush or DMA sync.
 	 */
 #if 0
 	if (dodelay)
 #endif
 		DELAY(3000);		/* 3ms is long enough for 1 frame */
 #ifdef ATH_DEBUG
 	if (sc->sc_debug & (ATH_DEBUG_RESET | ATH_DEBUG_FATAL)) {
 		struct ath_buf *bf;
 		u_int ix;
 
 		device_printf(sc->sc_dev,
 		    "%s: rx queue %p, link %p\n",
 		    __func__,
 		    (caddr_t)(uintptr_t) ath_hal_getrxbuf(ah, HAL_RX_QUEUE_HP),
 		    sc->sc_rxlink);
 		ix = 0;
 		TAILQ_FOREACH(bf, &sc->sc_rxbuf, bf_list) {
 			struct ath_desc *ds = bf->bf_desc;
 			struct ath_rx_status *rs = &bf->bf_status.ds_rxstat;
 			HAL_STATUS status = ath_hal_rxprocdesc(ah, ds,
 				bf->bf_daddr, PA2DESC(sc, ds->ds_link), rs);
 			if (status == HAL_OK || (sc->sc_debug & ATH_DEBUG_FATAL))
 				ath_printrxbuf(sc, bf, ix, status == HAL_OK);
 			ix++;
 		}
 	}
 #endif
 
 	(void) ath_legacy_flush_rxpending(sc);
 	(void) ath_legacy_flush_rxholdbf(sc);
 
 	sc->sc_rxlink = NULL;		/* just in case */
 
 	ATH_RX_UNLOCK(sc);
 #undef PA2DESC
 }
 
 /*
  * XXX TODO: something was calling startrecv without calling
  * stoprecv.  Let's figure out what/why.  It was showing up
  * as a mbuf leak (rxpending) and ath_buf leak (holdbf.)
  */
 
 /*
  * Enable the receive h/w following a reset.
  */
 static int
 ath_legacy_startrecv(struct ath_softc *sc)
 {
 	struct ath_hal *ah = sc->sc_ah;
 	struct ath_buf *bf;
 
 	ATH_RX_LOCK(sc);
 
 	/*
 	 * XXX should verify these are already all NULL!
 	 */
 	sc->sc_rxlink = NULL;
 	(void) ath_legacy_flush_rxpending(sc);
 	(void) ath_legacy_flush_rxholdbf(sc);
 
 	/*
 	 * Re-chain all of the buffers in the RX buffer list.
 	 */
 	TAILQ_FOREACH(bf, &sc->sc_rxbuf, bf_list) {
 		int error = ath_rxbuf_init(sc, bf);
 		if (error != 0) {
 			DPRINTF(sc, ATH_DEBUG_RECV,
 				"%s: ath_rxbuf_init failed %d\n",
 				__func__, error);
 			return error;
 		}
 	}
 
 	bf = TAILQ_FIRST(&sc->sc_rxbuf);
 	ath_hal_putrxbuf(ah, bf->bf_daddr, HAL_RX_QUEUE_HP);
 	ath_hal_rxena(ah);		/* enable recv descriptors */
 	ath_mode_init(sc);		/* set filters, etc. */
 	ath_hal_startpcurecv(ah);	/* re-enable PCU/DMA engine */
 
 	ATH_RX_UNLOCK(sc);
 	return 0;
 }
 
 static int
 ath_legacy_dma_rxsetup(struct ath_softc *sc)
 {
 	int error;
 
 	error = ath_descdma_setup(sc, &sc->sc_rxdma, &sc->sc_rxbuf,
 	    "rx", sizeof(struct ath_desc), ath_rxbuf, 1);
 	if (error != 0)
 		return (error);
 
 	return (0);
 }
 
 static int
 ath_legacy_dma_rxteardown(struct ath_softc *sc)
 {
 
 	if (sc->sc_rxdma.dd_desc_len != 0)
 		ath_descdma_cleanup(sc, &sc->sc_rxdma, &sc->sc_rxbuf);
 	return (0);
 }
 
 static void
 ath_legacy_recv_sched(struct ath_softc *sc, int dosched)
 {
 
 	taskqueue_enqueue(sc->sc_tq, &sc->sc_rxtask);
 }
 
 static void
 ath_legacy_recv_sched_queue(struct ath_softc *sc, HAL_RX_QUEUE q,
     int dosched)
 {
 
 	taskqueue_enqueue(sc->sc_tq, &sc->sc_rxtask);
 }
 
 void
 ath_recv_setup_legacy(struct ath_softc *sc)
 {
 
 	/* Sensible legacy defaults */
 	/*
 	 * XXX this should be changed to properly support the
 	 * exact RX descriptor size for each HAL.
 	 */
 	sc->sc_rx_statuslen = sizeof(struct ath_desc);
 
 	sc->sc_rx.recv_start = ath_legacy_startrecv;
 	sc->sc_rx.recv_stop = ath_legacy_stoprecv;
 	sc->sc_rx.recv_flush = ath_legacy_flushrecv;
 	sc->sc_rx.recv_tasklet = ath_legacy_rx_tasklet;
 	sc->sc_rx.recv_rxbuf_init = ath_legacy_rxbuf_init;
 
 	sc->sc_rx.recv_setup = ath_legacy_dma_rxsetup;
 	sc->sc_rx.recv_teardown = ath_legacy_dma_rxteardown;
 	sc->sc_rx.recv_sched = ath_legacy_recv_sched;
 	sc->sc_rx.recv_sched_queue = ath_legacy_recv_sched_queue;
 }
Index: projects/clang391-import/sys/dev/ath/if_ath_tx_edma.c
===================================================================
--- projects/clang391-import/sys/dev/ath/if_ath_tx_edma.c	(revision 309262)
+++ projects/clang391-import/sys/dev/ath/if_ath_tx_edma.c	(revision 309263)
@@ -1,1042 +1,1061 @@
 /*-
  * Copyright (c) 2012 Adrian Chadd <adrian@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer,
  *    without modification.
  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
  *    similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any
  *    redistribution must be conditioned upon including a substantially
  *    similar Disclaimer requirement for further binary redistribution.
  *
  * NO WARRANTY
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY
  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
  * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY,
  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
  * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  * THE POSSIBILITY OF SUCH DAMAGES.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * Driver for the Atheros Wireless LAN controller.
  *
  * This software is derived from work of Atsushi Onoe; his contribution
  * is greatly appreciated.
  */
 
 #include "opt_inet.h"
 #include "opt_ath.h"
 /*
  * This is needed for register operations which are performed
  * by the driver - eg, calls to ath_hal_gettsf32().
  *
  * It's also required for any AH_DEBUG checks in here, eg the
  * module dependencies.
  */
 #include "opt_ah.h"
 #include "opt_wlan.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sysctl.h>
 #include <sys/mbuf.h>
 #include <sys/malloc.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/kernel.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/errno.h>
 #include <sys/callout.h>
 #include <sys/bus.h>
 #include <sys/endian.h>
 #include <sys/kthread.h>
 #include <sys/taskqueue.h>
 #include <sys/priv.h>
 #include <sys/module.h>
 #include <sys/ktr.h>
 #include <sys/smp.h>	/* for mp_ncpus */
 
 #include <machine/bus.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/if_media.h>
 #include <net/if_types.h>
 #include <net/if_arp.h>
 #include <net/ethernet.h>
 #include <net/if_llc.h>
 
 #include <net80211/ieee80211_var.h>
 #include <net80211/ieee80211_regdomain.h>
 #ifdef IEEE80211_SUPPORT_SUPERG
 #include <net80211/ieee80211_superg.h>
 #endif
 #ifdef IEEE80211_SUPPORT_TDMA
 #include <net80211/ieee80211_tdma.h>
 #endif
 
 #include <net/bpf.h>
 
 #ifdef INET
 #include <netinet/in.h>
 #include <netinet/if_ether.h>
 #endif
 
 #include <dev/ath/if_athvar.h>
 #include <dev/ath/ath_hal/ah_devid.h>		/* XXX for softled */
 #include <dev/ath/ath_hal/ah_diagcodes.h>
 
 #include <dev/ath/if_ath_debug.h>
 #include <dev/ath/if_ath_misc.h>
 #include <dev/ath/if_ath_tsf.h>
 #include <dev/ath/if_ath_tx.h>
 #include <dev/ath/if_ath_sysctl.h>
 #include <dev/ath/if_ath_led.h>
 #include <dev/ath/if_ath_keycache.h>
 #include <dev/ath/if_ath_rx.h>
 #include <dev/ath/if_ath_beacon.h>
 #include <dev/ath/if_athdfs.h>
 #include <dev/ath/if_ath_descdma.h>
 
 #ifdef ATH_TX99_DIAG
 #include <dev/ath/ath_tx99/ath_tx99.h>
 #endif
 
 #include <dev/ath/if_ath_tx_edma.h>
 
 #ifdef	ATH_DEBUG_ALQ
 #include <dev/ath/if_ath_alq.h>
 #endif
 
 /*
  * some general macros
  */
 #define	INCR(_l, _sz)		(_l) ++; (_l) &= ((_sz) - 1)
 #define	DECR(_l, _sz)		(_l) --; (_l) &= ((_sz) - 1)
 
 /*
  * XXX doesn't belong here, and should be tunable
  */
 #define	ATH_TXSTATUS_RING_SIZE	512
 
 MALLOC_DECLARE(M_ATHDEV);
 
 static void ath_edma_tx_processq(struct ath_softc *sc, int dosched);
 
 #ifdef	ATH_DEBUG_ALQ
 static void
 ath_tx_alq_edma_push(struct ath_softc *sc, int txq, int nframes,
     int fifo_depth, int frame_cnt)
 {
 	struct if_ath_alq_tx_fifo_push aq;
 
 	aq.txq = htobe32(txq);
 	aq.nframes = htobe32(nframes);
 	aq.fifo_depth = htobe32(fifo_depth);
 	aq.frame_cnt = htobe32(frame_cnt);
 
 	if_ath_alq_post(&sc->sc_alq, ATH_ALQ_TX_FIFO_PUSH,
 	    sizeof(aq),
 	    (const char *) &aq);
 }
 #endif	/* ATH_DEBUG_ALQ */
 
 /*
  * XXX TODO: push an aggregate as a single FIFO slot, even though
  * it may not meet the TXOP for say, DBA-gated traffic in TDMA mode.
  *
  * The TX completion code handles a TX FIFO slot having multiple frames,
  * aggregate or otherwise, but it may just make things easier to deal
  * with.
  *
  * XXX TODO: track the number of aggregate subframes and put that in the
  * push alq message.
  */
 static void
 ath_tx_edma_push_staging_list(struct ath_softc *sc, struct ath_txq *txq,
     int limit)
 {
 	struct ath_buf *bf, *bf_last;
 	struct ath_buf *bfi, *bfp;
 	int i, sqdepth;
 	TAILQ_HEAD(axq_q_f_s, ath_buf)  sq;
 
 	ATH_TXQ_LOCK_ASSERT(txq);
 
 	/*
 	 * Don't bother doing any work if it's full.
 	 */
 	if (txq->axq_fifo_depth >= HAL_TXFIFO_DEPTH)
 		return;
 
 	if (TAILQ_EMPTY(&txq->axq_q))
 		return;
 
 	TAILQ_INIT(&sq);
 
 	/*
 	 * First pass - walk sq, queue up to 'limit' entries,
 	 * subtract them from the staging queue.
 	 */
 	sqdepth = 0;
 	for (i = 0; i < limit; i++) {
 		/* Grab the head entry */
 		bf = ATH_TXQ_FIRST(txq);
 		if (bf == NULL)
 			break;
 		ATH_TXQ_REMOVE(txq, bf, bf_list);
 
 		/* Queue it into our staging list */
 		TAILQ_INSERT_TAIL(&sq, bf, bf_list);
 
 		/* Ensure the flags are cleared */
 		bf->bf_flags &= ~(ATH_BUF_FIFOPTR | ATH_BUF_FIFOEND);
 		sqdepth++;
 	}
 
 	/*
 	 * Ok, so now we have a staging list of up to 'limit'
 	 * frames from the txq.  Now let's wrap that up
 	 * into its own list and pass that to the hardware
 	 * as one FIFO entry.
 	 */
 
 	bf = TAILQ_FIRST(&sq);
 	bf_last = TAILQ_LAST(&sq, axq_q_s);
 
 	/*
 	 * Ok, so here's the gymnastics reqiured to make this
 	 * all sensible.
 	 */
 
 	/*
 	 * Tag the first/last buffer appropriately.
 	 */
 	bf->bf_flags |= ATH_BUF_FIFOPTR;
 	bf_last->bf_flags |= ATH_BUF_FIFOEND;
 
 	/*
 	 * Walk the descriptor list and link them appropriately.
 	 */
 	bfp = NULL;
 	TAILQ_FOREACH(bfi, &sq, bf_list) {
 		if (bfp != NULL) {
 			ath_hal_settxdesclink(sc->sc_ah, bfp->bf_lastds,
 			    bfi->bf_daddr);
 		}
 		bfp = bfi;
 	}
 
 	i = 0;
 	TAILQ_FOREACH(bfi, &sq, bf_list) {
 #ifdef	ATH_DEBUG
 		if (sc->sc_debug & ATH_DEBUG_XMIT_DESC)
 			ath_printtxbuf(sc, bfi, txq->axq_qnum, i, 0);
 #endif/* ATH_DEBUG */
 #ifdef	ATH_DEBUG_ALQ
 		if (if_ath_alq_checkdebug(&sc->sc_alq, ATH_ALQ_EDMA_TXDESC))
 			ath_tx_alq_post(sc, bfi);
 #endif /* ATH_DEBUG_ALQ */
 		i++;
 	}
 
 	/*
 	 * We now need to push this set of frames onto the tail
 	 * of the FIFO queue.  We don't adjust the aggregate
 	 * count, only the queue depth counter(s).
 	 * We also need to blank the link pointer now.
 	 */
 
 	TAILQ_CONCAT(&txq->fifo.axq_q, &sq, bf_list);
 	/* Bump total queue tracking in FIFO queue */
 	txq->fifo.axq_depth += sqdepth;
 
 	/* Bump FIFO queue */
 	txq->axq_fifo_depth++;
 	DPRINTF(sc, ATH_DEBUG_XMIT | ATH_DEBUG_TX_PROC,
 	    "%s: queued %d packets; depth=%d, fifo depth=%d\n",
 	    __func__, sqdepth, txq->fifo.axq_depth, txq->axq_fifo_depth);
 
 	/* Push the first entry into the hardware */
 	ath_hal_puttxbuf(sc->sc_ah, txq->axq_qnum, bf->bf_daddr);
 
 	/* Push start on the DMA if it's not already started */
 	ath_hal_txstart(sc->sc_ah, txq->axq_qnum);
 
 #ifdef	ATH_DEBUG_ALQ
 	ath_tx_alq_edma_push(sc, txq->axq_qnum, sqdepth,
 	    txq->axq_fifo_depth,
 	    txq->fifo.axq_depth);
 #endif /* ATH_DEBUG_ALQ */
 }
 
 #define	TX_BATCH_SIZE	32
 
 /*
  * Push some frames into the TX FIFO if we have space.
  */
 static void
 ath_edma_tx_fifo_fill(struct ath_softc *sc, struct ath_txq *txq)
 {
 
 	ATH_TXQ_LOCK_ASSERT(txq);
 
 	DPRINTF(sc, ATH_DEBUG_TX_PROC,
 	    "%s: Q%d: called; fifo.depth=%d, fifo depth=%d, depth=%d, aggr_depth=%d\n",
 	    __func__,
 	    txq->axq_qnum,
 	    txq->fifo.axq_depth,
 	    txq->axq_fifo_depth,
 	    txq->axq_depth,
 	    txq->axq_aggr_depth);
 
 	/*
 	 * For now, push up to 32 frames per TX FIFO slot.
 	 * If more are in the hardware queue then they'll
 	 * get populated when we try to send another frame
 	 * or complete a frame - so at most there'll be
 	 * 32 non-AMPDU frames per node/TID anyway.
 	 *
 	 * Note that the hardware staging queue will limit
 	 * how many frames in total we will have pushed into
 	 * here.
 	 *
 	 * Later on, we'll want to push less frames into
 	 * the TX FIFO since we don't want to necessarily
 	 * fill tens or hundreds of milliseconds of potential
 	 * frames.
 	 *
 	 * However, we need more frames right now because of
 	 * how the MAC implements the frame scheduling policy.
 	 * It only ungates a single FIFO entry at a time,
 	 * and will run that until CHNTIME expires or the
 	 * end of that FIFO entry descriptor list is reached.
 	 * So for TDMA we suffer a big performance penalty -
 	 * single TX FIFO entries mean the MAC only sends out
 	 * one frame per DBA event, which turned out on average
 	 * 6ms per TX frame.
 	 *
 	 * So, for aggregates it's okay - it'll push two at a
 	 * time and this will just do them more efficiently.
 	 * For non-aggregates it'll do 4 at a time, up to the
 	 * non-aggr limit (non_aggr, which is 32.)  They should
 	 * be time based rather than a hard count, but I also
 	 * do need sleep.
 	 */
 
 	/*
 	 * Do some basic, basic batching to the hardware
 	 * queue.
 	 *
 	 * If we have TX_BATCH_SIZE entries in the staging
 	 * queue, then let's try to send them all in one hit.
 	 *
 	 * Ensure we don't push more than TX_BATCH_SIZE worth
 	 * in, otherwise we end up draining 8 slots worth of
 	 * 32 frames into the hardware queue and then we don't
 	 * attempt to push more frames in until we empty the
 	 * FIFO.
 	 */
 	if (txq->axq_depth >= TX_BATCH_SIZE / 2 &&
 	    txq->fifo.axq_depth <= TX_BATCH_SIZE) {
 		ath_tx_edma_push_staging_list(sc, txq, TX_BATCH_SIZE);
 	}
 
 	/*
 	 * Aggregate check: if we have less than two FIFO slots
 	 * busy and we have some aggregate frames, queue it.
 	 *
 	 * Now, ideally we'd just check to see if the scheduler
 	 * has given us aggregate frames and push them into the FIFO
 	 * as individual slots, as honestly we should just be pushing
 	 * a single aggregate in as one FIFO slot.
 	 *
 	 * Let's do that next once I know this works.
 	 */
 	else if (txq->axq_aggr_depth > 0 && txq->axq_fifo_depth < 2)
 		ath_tx_edma_push_staging_list(sc, txq, TX_BATCH_SIZE);
 
 	/*
 	 *
 	 * If we have less, and the TXFIFO isn't empty, let's
 	 * wait until we've finished sending the FIFO.
 	 *
 	 * If we have less, and the TXFIFO is empty, then
 	 * send them.
 	 */
 	else if (txq->axq_fifo_depth == 0) {
 		ath_tx_edma_push_staging_list(sc, txq, TX_BATCH_SIZE);
 	}
 }
 
 /*
  * Re-initialise the DMA FIFO with the current contents of
  * said TXQ.
  *
  * This should only be called as part of the chip reset path, as it
  * assumes the FIFO is currently empty.
  */
 static void
 ath_edma_dma_restart(struct ath_softc *sc, struct ath_txq *txq)
 {
 	struct ath_buf *bf;
 	int i = 0;
 	int fifostart = 1;
 	int old_fifo_depth;
 
 	DPRINTF(sc, ATH_DEBUG_RESET, "%s: Q%d: called\n",
 	    __func__,
 	    txq->axq_qnum);
 
 	ATH_TXQ_LOCK_ASSERT(txq);
 
 	/*
 	 * Let's log if the tracked FIFO depth doesn't match
 	 * what we actually push in.
 	 */
 	old_fifo_depth = txq->axq_fifo_depth;
 	txq->axq_fifo_depth = 0;
 
 	/*
 	 * Walk the FIFO staging list, looking for "head" entries.
 	 * Since we may have a partially completed list of frames,
 	 * we push the first frame we see into the FIFO and re-mark
 	 * it as the head entry.  We then skip entries until we see
 	 * FIFO end, at which point we get ready to push another
 	 * entry into the FIFO.
 	 */
 	TAILQ_FOREACH(bf, &txq->fifo.axq_q, bf_list) {
 		/*
 		 * If we're looking for FIFOEND and we haven't found
 		 * it, skip.
 		 *
 		 * If we're looking for FIFOEND and we've found it,
 		 * reset for another descriptor.
 		 */
 #ifdef	ATH_DEBUG
 		if (sc->sc_debug & ATH_DEBUG_XMIT_DESC)
 			ath_printtxbuf(sc, bf, txq->axq_qnum, i, 0);
 #endif/* ATH_DEBUG */
 #ifdef	ATH_DEBUG_ALQ
 		if (if_ath_alq_checkdebug(&sc->sc_alq, ATH_ALQ_EDMA_TXDESC))
 			ath_tx_alq_post(sc, bf);
 #endif /* ATH_DEBUG_ALQ */
 
 		if (fifostart == 0) {
 			if (bf->bf_flags & ATH_BUF_FIFOEND)
 				fifostart = 1;
 			continue;
 		}
 
 		/* Make sure we're not overflowing the FIFO! */
 		if (txq->axq_fifo_depth >= HAL_TXFIFO_DEPTH) {
 			device_printf(sc->sc_dev,
 			    "%s: Q%d: more frames in the queue; FIFO depth=%d?!\n",
 			    __func__,
 			    txq->axq_qnum,
 			    txq->axq_fifo_depth);
 		}
 
 #if 0
 		DPRINTF(sc, ATH_DEBUG_RESET,
 		    "%s: Q%d: depth=%d: pushing bf=%p; start=%d, end=%d\n",
 		    __func__,
 		    txq->axq_qnum,
 		    txq->axq_fifo_depth,
 		    bf,
 		    !! (bf->bf_flags & ATH_BUF_FIFOPTR),
 		    !! (bf->bf_flags & ATH_BUF_FIFOEND));
 #endif
 
 		/*
 		 * Set this to be the first buffer in the FIFO
 		 * list - even if it's also the last buffer in
 		 * a FIFO list!
 		 */
 		bf->bf_flags |= ATH_BUF_FIFOPTR;
 
 		/* Push it into the FIFO and bump the FIFO count */
 		ath_hal_puttxbuf(sc->sc_ah, txq->axq_qnum, bf->bf_daddr);
 		txq->axq_fifo_depth++;
 
 		/*
 		 * If this isn't the last entry either, let's
 		 * clear fifostart so we continue looking for
 		 * said last entry.
 		 */
 		if (! (bf->bf_flags & ATH_BUF_FIFOEND))
 			fifostart = 0;
 		i++;
 	}
 
 	/* Only bother starting the queue if there's something in it */
 	if (i > 0)
 		ath_hal_txstart(sc->sc_ah, txq->axq_qnum);
 
 	DPRINTF(sc, ATH_DEBUG_RESET, "%s: Q%d: FIFO depth was %d, is %d\n",
 	    __func__,
 	    txq->axq_qnum,
 	    old_fifo_depth,
 	    txq->axq_fifo_depth);
 
 	/* And now, let's check! */
 	if (txq->axq_fifo_depth != old_fifo_depth) {
 		device_printf(sc->sc_dev,
 		    "%s: Q%d: FIFO depth should be %d, is %d\n",
 		    __func__,
 		    txq->axq_qnum,
 		    old_fifo_depth,
 		    txq->axq_fifo_depth);
 	}
 }
 
 /*
  * Hand off this frame to a hardware queue.
  *
  * Things are a bit hairy in the EDMA world.  The TX FIFO is only
  * 8 entries deep, so we need to keep track of exactly what we've
  * pushed into the FIFO and what's just sitting in the TX queue,
  * waiting to go out.
  *
  * So this is split into two halves - frames get appended to the
  * TXQ; then a scheduler is called to push some frames into the
  * actual TX FIFO.
  */
 static void
 ath_edma_xmit_handoff_hw(struct ath_softc *sc, struct ath_txq *txq,
     struct ath_buf *bf)
 {
 
 	ATH_TXQ_LOCK(txq);
 
 	KASSERT((bf->bf_flags & ATH_BUF_BUSY) == 0,
 	    ("%s: busy status 0x%x", __func__, bf->bf_flags));
 
 	/*
 	 * XXX TODO: write a hard-coded check to ensure that
 	 * the queue id in the TX descriptor matches txq->axq_qnum.
 	 */
 
 	/* Update aggr stats */
 	if (bf->bf_state.bfs_aggr)
 		txq->axq_aggr_depth++;
 
 	/* Push and update frame stats */
 	ATH_TXQ_INSERT_TAIL(txq, bf, bf_list);
 
 	/*
 	 * Finally, call the FIFO schedule routine to schedule some
 	 * frames to the FIFO.
 	 */
 	ath_edma_tx_fifo_fill(sc, txq);
 	ATH_TXQ_UNLOCK(txq);
 }
 
 /*
  * Hand off this frame to a multicast software queue.
  *
  * The EDMA TX CABQ will get a list of chained frames, chained
  * together using the next pointer.  The single head of that
  * particular queue is pushed to the hardware CABQ.
  */
 static void
 ath_edma_xmit_handoff_mcast(struct ath_softc *sc, struct ath_txq *txq,
     struct ath_buf *bf)
 {
 
 	ATH_TX_LOCK_ASSERT(sc);
 	KASSERT((bf->bf_flags & ATH_BUF_BUSY) == 0,
 	    ("%s: busy status 0x%x", __func__, bf->bf_flags));
 
 	ATH_TXQ_LOCK(txq);
 	/*
 	 * XXX this is mostly duplicated in ath_tx_handoff_mcast().
 	 */
 	if (ATH_TXQ_LAST(txq, axq_q_s) != NULL) {
 		struct ath_buf *bf_last = ATH_TXQ_LAST(txq, axq_q_s);
 		struct ieee80211_frame *wh;
 
 		/* mark previous frame */
 		wh = mtod(bf_last->bf_m, struct ieee80211_frame *);
 		wh->i_fc[1] |= IEEE80211_FC1_MORE_DATA;
 
 		/* re-sync buffer to memory */
 		bus_dmamap_sync(sc->sc_dmat, bf_last->bf_dmamap,
 		   BUS_DMASYNC_PREWRITE);
 
 		/* link descriptor */
 		ath_hal_settxdesclink(sc->sc_ah,
 		    bf_last->bf_lastds,
 		    bf->bf_daddr);
 	}
 #ifdef	ATH_DEBUG_ALQ
 	if (if_ath_alq_checkdebug(&sc->sc_alq, ATH_ALQ_EDMA_TXDESC))
 		ath_tx_alq_post(sc, bf);
 #endif	/* ATH_DEBUG_ALQ */
 	ATH_TXQ_INSERT_TAIL(txq, bf, bf_list);
 	ATH_TXQ_UNLOCK(txq);
 }
 
 /*
  * Handoff this frame to the hardware.
  *
  * For the multicast queue, this will treat it as a software queue
  * and append it to the list, after updating the MORE_DATA flag
  * in the previous frame.  The cabq processing code will ensure
  * that the queue contents gets transferred over.
  *
  * For the hardware queues, this will queue a frame to the queue
  * like before, then populate the FIFO from that.  Since the
  * EDMA hardware has 8 FIFO slots per TXQ, this ensures that
  * frames such as management frames don't get prematurely dropped.
  *
  * This does imply that a similar flush-hwq-to-fifoq method will
  * need to be called from the processq function, before the
  * per-node software scheduler is called.
  */
 static void
 ath_edma_xmit_handoff(struct ath_softc *sc, struct ath_txq *txq,
     struct ath_buf *bf)
 {
 
 	DPRINTF(sc, ATH_DEBUG_XMIT_DESC,
 	    "%s: called; bf=%p, txq=%p, qnum=%d\n",
 	    __func__,
 	    bf,
 	    txq,
 	    txq->axq_qnum);
 
 	if (txq->axq_qnum == ATH_TXQ_SWQ)
 		ath_edma_xmit_handoff_mcast(sc, txq, bf);
 	else
 		ath_edma_xmit_handoff_hw(sc, txq, bf);
 }
 
 static int
 ath_edma_setup_txfifo(struct ath_softc *sc, int qnum)
 {
 	struct ath_tx_edma_fifo *te = &sc->sc_txedma[qnum];
 
 	te->m_fifo = malloc(sizeof(struct ath_buf *) * HAL_TXFIFO_DEPTH,
 	    M_ATHDEV,
 	    M_NOWAIT | M_ZERO);
 	if (te->m_fifo == NULL) {
 		device_printf(sc->sc_dev, "%s: malloc failed\n",
 		    __func__);
 		return (-ENOMEM);
 	}
 
 	/*
 	 * Set initial "empty" state.
 	 */
 	te->m_fifo_head = te->m_fifo_tail = te->m_fifo_depth = 0;
 	
 	return (0);
 }
 
 static int
 ath_edma_free_txfifo(struct ath_softc *sc, int qnum)
 {
 	struct ath_tx_edma_fifo *te = &sc->sc_txedma[qnum];
 
 	/* XXX TODO: actually deref the ath_buf entries? */
 	free(te->m_fifo, M_ATHDEV);
 	return (0);
 }
 
 static int
 ath_edma_dma_txsetup(struct ath_softc *sc)
 {
 	int error;
 	int i;
 
 	error = ath_descdma_alloc_desc(sc, &sc->sc_txsdma,
 	    NULL, "txcomp", sc->sc_tx_statuslen, ATH_TXSTATUS_RING_SIZE);
 	if (error != 0)
 		return (error);
 
 	ath_hal_setuptxstatusring(sc->sc_ah,
 	    (void *) sc->sc_txsdma.dd_desc,
 	    sc->sc_txsdma.dd_desc_paddr,
 	    ATH_TXSTATUS_RING_SIZE);
 
 	for (i = 0; i < HAL_NUM_TX_QUEUES; i++) {
 		ath_edma_setup_txfifo(sc, i);
 	}
 
 	return (0);
 }
 
 static int
 ath_edma_dma_txteardown(struct ath_softc *sc)
 {
 	int i;
 
 	for (i = 0; i < HAL_NUM_TX_QUEUES; i++) {
 		ath_edma_free_txfifo(sc, i);
 	}
 
 	ath_descdma_cleanup(sc, &sc->sc_txsdma, NULL);
 	return (0);
 }
 
 /*
  * Drain all TXQs, potentially after completing the existing completed
  * frames.
  */
 static void
 ath_edma_tx_drain(struct ath_softc *sc, ATH_RESET_TYPE reset_type)
 {
 	int i;
 
 	DPRINTF(sc, ATH_DEBUG_RESET, "%s: called\n", __func__);
 
 	(void) ath_stoptxdma(sc);
 
 	/*
 	 * If reset type is noloss, the TX FIFO needs to be serviced
 	 * and those frames need to be handled.
 	 *
 	 * Otherwise, just toss everything in each TX queue.
 	 */
 	if (reset_type == ATH_RESET_NOLOSS) {
 		ath_edma_tx_processq(sc, 0);
 		for (i = 0; i < HAL_NUM_TX_QUEUES; i++) {
 			if (ATH_TXQ_SETUP(sc, i)) {
 				ATH_TXQ_LOCK(&sc->sc_txq[i]);
 				/*
 				 * Free the holding buffer; DMA is now
 				 * stopped.
 				 */
 				ath_txq_freeholdingbuf(sc, &sc->sc_txq[i]);
 				/*
 				 * Reset the link pointer to NULL; there's
 				 * no frames to chain DMA to.
 				 */
 				sc->sc_txq[i].axq_link = NULL;
 				ATH_TXQ_UNLOCK(&sc->sc_txq[i]);
 			}
 		}
 	} else {
 		for (i = 0; i < HAL_NUM_TX_QUEUES; i++) {
 			if (ATH_TXQ_SETUP(sc, i))
 				ath_tx_draintxq(sc, &sc->sc_txq[i]);
 		}
 	}
 
 	/* XXX dump out the TX completion FIFO contents */
 
 	/* XXX dump out the frames */
 
 	sc->sc_wd_timer = 0;
 }
 
 /*
  * TX completion tasklet.
  */
 
 static void
 ath_edma_tx_proc(void *arg, int npending)
 {
 	struct ath_softc *sc = (struct ath_softc *) arg;
 
+	ATH_PCU_LOCK(sc);
+	sc->sc_txproc_cnt++;
+	ATH_PCU_UNLOCK(sc);
+
+	ATH_LOCK(sc);
+	ath_power_set_power_state(sc, HAL_PM_AWAKE);
+	ATH_UNLOCK(sc);
+
 #if 0
 	DPRINTF(sc, ATH_DEBUG_TX_PROC, "%s: called, npending=%d\n",
 	    __func__, npending);
 #endif
 	ath_edma_tx_processq(sc, 1);
+
+
+	ATH_PCU_LOCK(sc);
+	sc->sc_txproc_cnt--;
+	ATH_PCU_UNLOCK(sc);
+
+	ATH_LOCK(sc);
+	ath_power_restore_power_state(sc);
+	ATH_UNLOCK(sc);
+
+	ath_tx_kick(sc);
 }
 
 /*
  * Process the TX status queue.
  */
 static void
 ath_edma_tx_processq(struct ath_softc *sc, int dosched)
 {
 	struct ath_hal *ah = sc->sc_ah;
 	HAL_STATUS status;
 	struct ath_tx_status ts;
 	struct ath_txq *txq;
 	struct ath_buf *bf;
 	struct ieee80211_node *ni;
 	int nacked = 0;
 	int idx;
 	int i;
 
 #ifdef	ATH_DEBUG
 	/* XXX */
 	uint32_t txstatus[32];
 #endif
 
 	for (idx = 0; ; idx++) {
 		bzero(&ts, sizeof(ts));
 
 		ATH_TXSTATUS_LOCK(sc);
 #ifdef	ATH_DEBUG
 		ath_hal_gettxrawtxdesc(ah, txstatus);
 #endif
 		status = ath_hal_txprocdesc(ah, NULL, (void *) &ts);
 		ATH_TXSTATUS_UNLOCK(sc);
 
 		if (status == HAL_EINPROGRESS)
 			break;
 
 #ifdef	ATH_DEBUG
 		if (sc->sc_debug & ATH_DEBUG_TX_PROC)
 			if (ts.ts_queue_id != sc->sc_bhalq)
 			ath_printtxstatbuf(sc, NULL, txstatus, ts.ts_queue_id,
 			    idx, (status == HAL_OK));
 #endif
 
 		/*
 		 * If there is an error with this descriptor, continue
 		 * processing.
 		 *
 		 * XXX TBD: log some statistics?
 		 */
 		if (status == HAL_EIO) {
 			device_printf(sc->sc_dev, "%s: invalid TX status?\n",
 			    __func__);
 			break;
 		}
 
 #if defined(ATH_DEBUG_ALQ) && defined(ATH_DEBUG)
 		if (if_ath_alq_checkdebug(&sc->sc_alq, ATH_ALQ_EDMA_TXSTATUS)) {
 			if_ath_alq_post(&sc->sc_alq, ATH_ALQ_EDMA_TXSTATUS,
 			    sc->sc_tx_statuslen,
 			    (char *) txstatus);
 		}
 #endif /* ATH_DEBUG_ALQ */
 
 		/*
 		 * At this point we have a valid status descriptor.
 		 * The QID and descriptor ID (which currently isn't set)
 		 * is part of the status.
 		 *
 		 * We then assume that the descriptor in question is the
 		 * -head- of the given QID.  Eventually we should verify
 		 * this by using the descriptor ID.
 		 */
 
 		/*
 		 * The beacon queue is not currently a "real" queue.
 		 * Frames aren't pushed onto it and the lock isn't setup.
 		 * So skip it for now; the beacon handling code will
 		 * free and alloc more beacon buffers as appropriate.
 		 */
 		if (ts.ts_queue_id == sc->sc_bhalq)
 			continue;
 
 		txq = &sc->sc_txq[ts.ts_queue_id];
 
 		ATH_TXQ_LOCK(txq);
 		bf = ATH_TXQ_FIRST(&txq->fifo);
 
 		/*
 		 * Work around the situation where I'm seeing notifications
 		 * for Q1 when no frames are available.  That needs to be
 		 * debugged but not by crashing _here_.
 		 */
 		if (bf == NULL) {
 			device_printf(sc->sc_dev, "%s: Q%d: empty?\n",
 			    __func__,
 			    ts.ts_queue_id);
 			ATH_TXQ_UNLOCK(txq);
 			continue;
 		}
 
 		DPRINTF(sc, ATH_DEBUG_TX_PROC, "%s: Q%d, bf=%p, start=%d, end=%d\n",
 		    __func__,
 		    ts.ts_queue_id, bf,
 		    !! (bf->bf_flags & ATH_BUF_FIFOPTR),
 		    !! (bf->bf_flags & ATH_BUF_FIFOEND));
 
 		/* XXX TODO: actually output debugging info about this */
 
 #if 0
 		/* XXX assert the buffer/descriptor matches the status descid */
 		if (ts.ts_desc_id != bf->bf_descid) {
 			device_printf(sc->sc_dev,
 			    "%s: mismatched descid (qid=%d, tsdescid=%d, "
 			    "bfdescid=%d\n",
 			    __func__,
 			    ts.ts_queue_id,
 			    ts.ts_desc_id,
 			    bf->bf_descid);
 		}
 #endif
 
 		/* This removes the buffer and decrements the queue depth */
 		ATH_TXQ_REMOVE(&txq->fifo, bf, bf_list);
 		if (bf->bf_state.bfs_aggr)
 			txq->axq_aggr_depth--;
 
 		/*
 		 * If this was the end of a FIFO set, decrement FIFO depth
 		 */
 		if (bf->bf_flags & ATH_BUF_FIFOEND)
 			txq->axq_fifo_depth--;
 
 		/*
 		 * If this isn't the final buffer in a FIFO set, mark
 		 * the buffer as busy so it goes onto the holding queue.
 		 */
 		if (! (bf->bf_flags & ATH_BUF_FIFOEND))
 			bf->bf_flags |= ATH_BUF_BUSY;
 
 		DPRINTF(sc, ATH_DEBUG_TX_PROC, "%s: Q%d: FIFO depth is now %d (%d)\n",
 		    __func__,
 		    txq->axq_qnum,
 		    txq->axq_fifo_depth,
 		    txq->fifo.axq_depth);
 
 		/* XXX assert FIFO depth >= 0 */
 		ATH_TXQ_UNLOCK(txq);
 
 		/*
 		 * Outside of the TX lock - if the buffer is end
 		 * end buffer in this FIFO, we don't need a holding
 		 * buffer any longer.
 		 */
 		if (bf->bf_flags & ATH_BUF_FIFOEND) {
 			ATH_TXQ_LOCK(txq);
 			ath_txq_freeholdingbuf(sc, txq);
 			ATH_TXQ_UNLOCK(txq);
 		}
 
 		/*
 		 * First we need to make sure ts_rate is valid.
 		 *
 		 * Pre-EDMA chips pass the whole TX descriptor to
 		 * the proctxdesc function which will then fill out
 		 * ts_rate based on the ts_finaltsi (final TX index)
 		 * in the TX descriptor.  However the TX completion
 		 * FIFO doesn't have this information.  So here we
 		 * do a separate HAL call to populate that information.
 		 *
 		 * The same problem exists with ts_longretry.
 		 * The FreeBSD HAL corrects ts_longretry in the HAL layer;
 		 * the AR9380 HAL currently doesn't.  So until the HAL
 		 * is imported and this can be added, we correct for it
 		 * here.
 		 */
 		/* XXX TODO */
 		/* XXX faked for now. Ew. */
 		if (ts.ts_finaltsi < 4) {
 			ts.ts_rate =
 			    bf->bf_state.bfs_rc[ts.ts_finaltsi].ratecode;
 			switch (ts.ts_finaltsi) {
 			case 3: ts.ts_longretry +=
 			    bf->bf_state.bfs_rc[2].tries;
 			case 2: ts.ts_longretry +=
 			    bf->bf_state.bfs_rc[1].tries;
 			case 1: ts.ts_longretry +=
 			    bf->bf_state.bfs_rc[0].tries;
 			}
 		} else {
 			device_printf(sc->sc_dev, "%s: finaltsi=%d\n",
 			    __func__,
 			    ts.ts_finaltsi);
 			ts.ts_rate = bf->bf_state.bfs_rc[0].ratecode;
 		}
 
 		/*
 		 * XXX This is terrible.
 		 *
 		 * Right now, some code uses the TX status that is
 		 * passed in here, but the completion handlers in the
 		 * software TX path also use bf_status.ds_txstat.
 		 * Ew.  That should all go away.
 		 *
 		 * XXX It's also possible the rate control completion
 		 * routine is called twice.
 		 */
 		memcpy(&bf->bf_status, &ts, sizeof(ts));
 
 		ni = bf->bf_node;
 
 		/* Update RSSI */
 		/* XXX duplicate from ath_tx_processq */
 		if (ni != NULL && ts.ts_status == 0 &&
 		    ((bf->bf_state.bfs_txflags & HAL_TXDESC_NOACK) == 0)) {
 			nacked++;
 			sc->sc_stats.ast_tx_rssi = ts.ts_rssi;
 			ATH_RSSI_LPF(sc->sc_halstats.ns_avgtxrssi,
 			    ts.ts_rssi);
 		}
 
 		/* Handle frame completion and rate control update */
 		ath_tx_process_buf_completion(sc, txq, &ts, bf);
 
 		/* NB: bf is invalid at this point */
 	}
 
 	sc->sc_wd_timer = 0;
 
 	/*
 	 * XXX It's inefficient to do this if the FIFO queue is full,
 	 * but there's no easy way right now to only populate
 	 * the txq task for _one_ TXQ.  This should be fixed.
 	 */
 	if (dosched) {
 		/* Attempt to schedule more hardware frames to the TX FIFO */
 		for (i = 0; i < HAL_NUM_TX_QUEUES; i++) {
 			if (ATH_TXQ_SETUP(sc, i)) {
 				ATH_TXQ_LOCK(&sc->sc_txq[i]);
 				ath_edma_tx_fifo_fill(sc, &sc->sc_txq[i]);
 				ATH_TXQ_UNLOCK(&sc->sc_txq[i]);
 			}
 		}
 		/* Kick software scheduler */
 		ath_tx_swq_kick(sc);
 	}
 }
 
 static void
 ath_edma_attach_comp_func(struct ath_softc *sc)
 {
 
 	TASK_INIT(&sc->sc_txtask, 0, ath_edma_tx_proc, sc);
 }
 
 void
 ath_xmit_setup_edma(struct ath_softc *sc)
 {
 
 	/* Fetch EDMA field and buffer sizes */
 	(void) ath_hal_gettxdesclen(sc->sc_ah, &sc->sc_tx_desclen);
 	(void) ath_hal_gettxstatuslen(sc->sc_ah, &sc->sc_tx_statuslen);
 	(void) ath_hal_getntxmaps(sc->sc_ah, &sc->sc_tx_nmaps);
 
 	if (bootverbose) {
 		device_printf(sc->sc_dev, "TX descriptor length: %d\n",
 		    sc->sc_tx_desclen);
 		device_printf(sc->sc_dev, "TX status length: %d\n",
 		    sc->sc_tx_statuslen);
 		device_printf(sc->sc_dev, "TX buffers per descriptor: %d\n",
 		    sc->sc_tx_nmaps);
 	}
 
 	sc->sc_tx.xmit_setup = ath_edma_dma_txsetup;
 	sc->sc_tx.xmit_teardown = ath_edma_dma_txteardown;
 	sc->sc_tx.xmit_attach_comp_func = ath_edma_attach_comp_func;
 
 	sc->sc_tx.xmit_dma_restart = ath_edma_dma_restart;
 	sc->sc_tx.xmit_handoff = ath_edma_xmit_handoff;
 	sc->sc_tx.xmit_drain = ath_edma_tx_drain;
 }
Index: projects/clang391-import/sys/dev/ath/if_athioctl.h
===================================================================
--- projects/clang391-import/sys/dev/ath/if_athioctl.h	(revision 309262)
+++ projects/clang391-import/sys/dev/ath/if_athioctl.h	(revision 309263)
@@ -1,450 +1,455 @@
 /*-
  * Copyright (c) 2002-2009 Sam Leffler, Errno Consulting
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer,
  *    without modification.
  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
  *    similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any
  *    redistribution must be conditioned upon including a substantially
  *    similar Disclaimer requirement for further binary redistribution.
  *
  * NO WARRANTY
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY
  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
  * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY,
  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
  * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  * THE POSSIBILITY OF SUCH DAMAGES.
  *
  * $FreeBSD$
  */
 
 /*
  * Ioctl-related defintions for the Atheros Wireless LAN controller driver.
  */
 #ifndef _DEV_ATH_ATHIOCTL_H
 #define _DEV_ATH_ATHIOCTL_H
 
 struct ath_tx_aggr_stats {
 	u_int32_t	aggr_pkts[64];
 	u_int32_t	aggr_single_pkt;
 	u_int32_t	aggr_nonbaw_pkt;
 	u_int32_t	aggr_aggr_pkt;
 	u_int32_t	aggr_baw_closed_single_pkt;
 	u_int32_t	aggr_low_hwq_single_pkt;
 	u_int32_t	aggr_sched_nopkt;
 	u_int32_t	aggr_rts_aggr_limited;
 };
 
 struct ath_intr_stats {
 	u_int32_t	sync_intr[32];
 };
 
 struct ath_stats {
 	u_int32_t	ast_watchdog;	/* device reset by watchdog */
 	u_int32_t	ast_hardware;	/* fatal hardware error interrupts */
 	u_int32_t	ast_bmiss;	/* beacon miss interrupts */
 	u_int32_t	ast_bmiss_phantom;/* beacon miss interrupts */
 	u_int32_t	ast_bstuck;	/* beacon stuck interrupts */
 	u_int32_t	ast_rxorn;	/* rx overrun interrupts */
 	u_int32_t	ast_rxeol;	/* rx eol interrupts */
 	u_int32_t	ast_txurn;	/* tx underrun interrupts */
 	u_int32_t	ast_mib;	/* mib interrupts */
 	u_int32_t	ast_intrcoal;	/* interrupts coalesced */
 	u_int32_t	ast_tx_packets;	/* packet sent on the interface */
 	u_int32_t	ast_tx_mgmt;	/* management frames transmitted */
 	u_int32_t	ast_tx_discard;	/* frames discarded prior to assoc */
 	u_int32_t	ast_tx_qstop;	/* output stopped 'cuz no buffer */
 	u_int32_t	ast_tx_encap;	/* tx encapsulation failed */
 	u_int32_t	ast_tx_nonode;	/* tx failed 'cuz no node */
 	u_int32_t	ast_tx_nombuf;	/* tx failed 'cuz no mbuf */
 	u_int32_t	ast_tx_nomcl;	/* tx failed 'cuz no cluster */
 	u_int32_t	ast_tx_linear;	/* tx linearized to cluster */
 	u_int32_t	ast_tx_nodata;	/* tx discarded empty frame */
 	u_int32_t	ast_tx_busdma;	/* tx failed for dma resrcs */
 	u_int32_t	ast_tx_xretries;/* tx failed 'cuz too many retries */
 	u_int32_t	ast_tx_fifoerr;	/* tx failed 'cuz FIFO underrun */
 	u_int32_t	ast_tx_filtered;/* tx failed 'cuz xmit filtered */
 	u_int32_t	ast_tx_shortretry;/* tx on-chip retries (short) */
 	u_int32_t	ast_tx_longretry;/* tx on-chip retries (long) */
 	u_int32_t	ast_tx_badrate;	/* tx failed 'cuz bogus xmit rate */
 	u_int32_t	ast_tx_noack;	/* tx frames with no ack marked */
 	u_int32_t	ast_tx_rts;	/* tx frames with rts enabled */
 	u_int32_t	ast_tx_cts;	/* tx frames with cts enabled */
 	u_int32_t	ast_tx_shortpre;/* tx frames with short preamble */
 	u_int32_t	ast_tx_altrate;	/* tx frames with alternate rate */
 	u_int32_t	ast_tx_protect;	/* tx frames with protection */
 	u_int32_t	ast_tx_ctsburst;/* tx frames with cts and bursting */
 	u_int32_t	ast_tx_ctsext;	/* tx frames with cts extension */
 	u_int32_t	ast_rx_nombuf;	/* rx setup failed 'cuz no mbuf */
 	u_int32_t	ast_rx_busdma;	/* rx setup failed for dma resrcs */
 	u_int32_t	ast_rx_orn;	/* rx failed 'cuz of desc overrun */
 	u_int32_t	ast_rx_crcerr;	/* rx failed 'cuz of bad CRC */
 	u_int32_t	ast_rx_fifoerr;	/* rx failed 'cuz of FIFO overrun */
 	u_int32_t	ast_rx_badcrypt;/* rx failed 'cuz decryption */
 	u_int32_t	ast_rx_badmic;	/* rx failed 'cuz MIC failure */
 	u_int32_t	ast_rx_phyerr;	/* rx failed 'cuz of PHY err */
 	u_int32_t	ast_rx_phy[64];	/* rx PHY error per-code counts */
 	u_int32_t	ast_rx_tooshort;/* rx discarded 'cuz frame too short */
 	u_int32_t	ast_rx_toobig;	/* rx discarded 'cuz frame too large */
 	u_int32_t	ast_rx_packets;	/* packet recv on the interface */
 	u_int32_t	ast_rx_mgt;	/* management frames received */
 	u_int32_t	ast_rx_ctl;	/* rx discarded 'cuz ctl frame */
 	int8_t		ast_tx_rssi;	/* tx rssi of last ack */
 	int8_t		ast_rx_rssi;	/* rx rssi from histogram */
 	u_int8_t	ast_tx_rate;	/* IEEE rate of last unicast tx */
 	u_int32_t	ast_be_xmit;	/* beacons transmitted */
 	u_int32_t	ast_be_nombuf;	/* beacon setup failed 'cuz no mbuf */
 	u_int32_t	ast_per_cal;	/* periodic calibration calls */
 	u_int32_t	ast_per_calfail;/* periodic calibration failed */
 	u_int32_t	ast_per_rfgain;	/* periodic calibration rfgain reset */
 	u_int32_t	ast_rate_calls;	/* rate control checks */
 	u_int32_t	ast_rate_raise;	/* rate control raised xmit rate */
 	u_int32_t	ast_rate_drop;	/* rate control dropped xmit rate */
 	u_int32_t	ast_ant_defswitch;/* rx/default antenna switches */
 	u_int32_t	ast_ant_txswitch;/* tx antenna switches */
 	u_int32_t	ast_ant_rx[8];	/* rx frames with antenna */
 	u_int32_t	ast_ant_tx[8];	/* tx frames with antenna */
 	u_int32_t	ast_cabq_xmit;	/* cabq frames transmitted */
 	u_int32_t	ast_cabq_busy;	/* cabq found busy */
 	u_int32_t	ast_tx_raw;	/* tx frames through raw api */
 	u_int32_t	ast_ff_txok;	/* fast frames tx'd successfully */
 	u_int32_t	ast_ff_txerr;	/* fast frames tx'd w/ error */
 	u_int32_t	ast_ff_rx;	/* fast frames rx'd */
 	u_int32_t	ast_ff_flush;	/* fast frames flushed from staging q */
 	u_int32_t	ast_tx_qfull;	/* tx dropped 'cuz of queue limit */
 	int8_t		ast_rx_noise;	/* rx noise floor */
 	u_int32_t	ast_tx_nobuf;	/* tx dropped 'cuz no ath buffer */
 	u_int32_t	ast_tdma_update;/* TDMA slot timing updates */
 	u_int32_t	ast_tdma_timers;/* TDMA slot update set beacon timers */
 	u_int32_t	ast_tdma_tsf;	/* TDMA slot update set TSF */
 	u_int16_t	ast_tdma_tsfadjp;/* TDMA slot adjust+ (usec, smoothed)*/
 	u_int16_t	ast_tdma_tsfadjm;/* TDMA slot adjust- (usec, smoothed)*/
 	u_int32_t	ast_tdma_ack;	/* TDMA tx failed 'cuz ACK required */
 	u_int32_t	ast_tx_raw_fail;/* raw tx failed 'cuz h/w down */
 	u_int32_t	ast_tx_nofrag;	/* tx dropped 'cuz no ath frag buffer */
 	u_int32_t	ast_be_missed;	/* missed beacons */
 	u_int32_t	ast_ani_cal;	/* ANI calibrations performed */
 	u_int32_t	ast_rx_agg;	/* number of aggregate frames RX'ed */
 	u_int32_t	ast_rx_halfgi;	/* RX half-GI */
 	u_int32_t	ast_rx_2040;	/* RX 40mhz frame */
 	u_int32_t	ast_rx_pre_crc_err;	/* RX pre-delimiter CRC error */
 	u_int32_t	ast_rx_post_crc_err;	/* RX post-delimiter CRC error */
 	u_int32_t	ast_rx_decrypt_busy_err;	/* RX decrypt engine busy error */
 	u_int32_t	ast_rx_hi_rx_chain;
 	u_int32_t	ast_tx_htprotect;	/* HT tx frames with protection */
 	u_int32_t	ast_rx_hitqueueend;	/* RX hit descr queue end */
 	u_int32_t	ast_tx_timeout;		/* Global TX timeout */
 	u_int32_t	ast_tx_cst;		/* Carrier sense timeout */
 	u_int32_t	ast_tx_xtxop;	/* tx exceeded TXOP */
 	u_int32_t	ast_tx_timerexpired;	/* tx exceeded TX_TIMER */
 	u_int32_t	ast_tx_desccfgerr;	/* tx desc cfg error */
 	u_int32_t	ast_tx_swretries;	/* software TX retries */
 	u_int32_t	ast_tx_swretrymax;	/* software TX retry max limit reach */
 	u_int32_t	ast_tx_data_underrun;
 	u_int32_t	ast_tx_delim_underrun;
 	u_int32_t	ast_tx_aggr_failall;	/* aggregate TX failed in its entirety */
 	u_int32_t	ast_tx_getnobuf;
 	u_int32_t	ast_tx_getbusybuf;
 	u_int32_t	ast_tx_intr;
 	u_int32_t	ast_rx_intr;
 	u_int32_t	ast_tx_aggr_ok;		/* aggregate TX ok */
 	u_int32_t	ast_tx_aggr_fail;	/* aggregate TX failed */
 	u_int32_t	ast_tx_mcastq_overflow;	/* multicast queue overflow */
 	u_int32_t	ast_rx_keymiss;
 	u_int32_t	ast_tx_swfiltered;
 	u_int32_t	ast_tx_node_psq_overflow;
 	u_int32_t	ast_rx_stbc;		/* RX STBC frame */
 	u_int32_t	ast_tx_nodeq_overflow;	/* node sw queue overflow */
 	u_int32_t	ast_tx_ldpc;		/* TX LDPC frame */
 	u_int32_t	ast_tx_stbc;		/* TX STBC frame */
 	u_int32_t	ast_pad[10];
 };
 
 #define	SIOCGATHSTATS	_IOWR('i', 137, struct ifreq)
 #define	SIOCZATHSTATS	_IOWR('i', 139, struct ifreq)
 #define	SIOCGATHAGSTATS	_IOWR('i', 141, struct ifreq)
 
 struct ath_diag {
 	char	ad_name[IFNAMSIZ];	/* if name, e.g. "ath0" */
 	u_int16_t ad_id;
 #define	ATH_DIAG_DYN	0x8000		/* allocate buffer in caller */
 #define	ATH_DIAG_IN	0x4000		/* copy in parameters */
 #define	ATH_DIAG_OUT	0x0000		/* copy out results (always) */
 #define	ATH_DIAG_ID	0x0fff
 	u_int16_t ad_in_size;		/* pack to fit, yech */
 	caddr_t	ad_in_data;
 	caddr_t	ad_out_data;
 	u_int	ad_out_size;
 
 };
 #define	SIOCGATHDIAG	_IOWR('i', 138, struct ath_diag)
 #define	SIOCGATHPHYERR	_IOWR('i', 140, struct ath_diag)
 
 
 /*
  * The rate control ioctl has to support multiple potential rate
  * control classes.  For now, instead of trying to support an
  * abstraction for this in the API, let's just use a TLV
  * representation for the payload and let userspace sort it out.
  */
 struct ath_rateioctl_tlv {
 	uint16_t	tlv_id;
 	uint16_t	tlv_len;	/* length excluding TLV header */
 };
 
 /*
  * This is purely the six byte MAC address.
  */
 #define	ATH_RATE_TLV_MACADDR		0xaab0
 
 /*
  * The rate control modules may decide to push a mapping table
  * of rix -> net80211 ratecode as part of the update.
  */
 #define	ATH_RATE_TLV_RATETABLE_NENTRIES	64
 struct ath_rateioctl_rt {
 	uint16_t	nentries;
 	uint16_t	pad[1];
 	uint8_t		ratecode[ATH_RATE_TLV_RATETABLE_NENTRIES];
 };
 #define	ATH_RATE_TLV_RATETABLE		0xaab1
 
 /*
  * This is the sample node statistics structure.
  * More in ath_rate/sample/sample.h.
  */
 #define	ATH_RATE_TLV_SAMPLENODE		0xaab2
 
 struct ath_rateioctl {
 	char	if_name[IFNAMSIZ];	/* if name */
 	union {
 		uint8_t		macaddr[IEEE80211_ADDR_LEN];
 		uint64_t	pad;
 	} is_u;
 	uint32_t		len;
 	caddr_t			buf;
 };
 #define	SIOCGATHNODERATESTATS	_IOWR('i', 149, struct ath_rateioctl)
 #define	SIOCGATHRATESTATS	_IOWR('i', 150, struct ath_rateioctl)
 
 /*
  * Radio capture format.
  */
 #define ATH_RX_RADIOTAP_PRESENT_BASE (		\
 	(1 << IEEE80211_RADIOTAP_TSFT)		| \
 	(1 << IEEE80211_RADIOTAP_FLAGS)		| \
 	(1 << IEEE80211_RADIOTAP_RATE)		| \
 	(1 << IEEE80211_RADIOTAP_ANTENNA)	| \
 	(1 << IEEE80211_RADIOTAP_DBM_ANTSIGNAL)	| \
 	(1 << IEEE80211_RADIOTAP_DBM_ANTNOISE)	| \
 	(1 << IEEE80211_RADIOTAP_XCHANNEL)	| \
 	0)
 
 #ifdef	ATH_ENABLE_RADIOTAP_VENDOR_EXT
 #define	ATH_RX_RADIOTAP_PRESENT \
 	(ATH_RX_RADIOTAP_PRESENT_BASE		| \
 	(1 << IEEE80211_RADIOTAP_VENDOREXT)	| \
 	(1 << IEEE80211_RADIOTAP_EXT)		| \
 	0)
 #else
 #define	ATH_RX_RADIOTAP_PRESENT	ATH_RX_RADIOTAP_PRESENT_BASE
 #endif	/* ATH_ENABLE_RADIOTAP_PRESENT */
 
 #ifdef	ATH_ENABLE_RADIOTAP_VENDOR_EXT
 /*
  * This is higher than the vendor bitmap used inside
  * the Atheros reference codebase.
  */
 
 /* Bit 8 */
 #define	ATH_RADIOTAP_VENDOR_HEADER	8
 
 /*
  * Using four chains makes all the fields in the
  * per-chain info header be 4-byte aligned.
  */
 #define	ATH_RADIOTAP_MAX_CHAINS		4
 
 /*
  * AR9380 and later chips are 3x3, which requires
  * 5 EVM DWORDs in HT40 mode.
  */
 #define	ATH_RADIOTAP_MAX_EVM		5
 
 /*
  * The vendor radiotap header data needs to be:
  *
  * + Aligned to a 4 byte address
  * + .. so all internal fields are 4 bytes aligned;
  * + .. and no 64 bit fields are allowed.
  *
  * So padding is required to ensure this is the case.
  *
  * Note that because of the lack of alignment with the
  * vendor header (6 bytes), the first field must be
  * two bytes so it can be accessed by alignment-strict
  * platform (eg MIPS.)
  */
 struct ath_radiotap_vendor_hdr {		/* 30 bytes */
 	uint8_t		vh_version;		/* 1 */
 	uint8_t		vh_rx_chainmask;	/* 1 */
 
 	/* At this point it should be 4 byte aligned */
 	uint32_t	evm[ATH_RADIOTAP_MAX_EVM];	/* 5 * 4 = 20 */
 
 	uint8_t		rssi_ctl[ATH_RADIOTAP_MAX_CHAINS];	/* 4 * 4 = 16 */
 	uint8_t		rssi_ext[ATH_RADIOTAP_MAX_CHAINS];	/* 4 * 4 = 16 */
 
 	uint8_t		vh_phyerr_code;	/* Phy error code, or 0xff */
 	uint8_t		vh_rs_status;	/* RX status */
 	uint8_t		vh_rssi;	/* Raw RSSI */
 	uint8_t		vh_flags;	/* General flags */
 #define	ATH_VENDOR_PKT_RX	0x01
 #define	ATH_VENDOR_PKT_TX	0x02
 #define	ATH_VENDOR_PKT_RXPHYERR	0x04
 #define	ATH_VENDOR_PKT_ISAGGR	0x08
 #define	ATH_VENDOR_PKT_MOREAGGR	0x10
 
 	uint8_t		vh_rx_hwrate;	/* hardware RX ratecode */
 	uint8_t		vh_rs_flags;	/* RX HAL flags */
 	uint8_t		vh_pad[2];	/* pad to DWORD boundary */
 } __packed;
 #endif	/* ATH_ENABLE_RADIOTAP_VENDOR_EXT */
 
 struct ath_rx_radiotap_header {
 	struct ieee80211_radiotap_header wr_ihdr;
 
 #ifdef	ATH_ENABLE_RADIOTAP_VENDOR_EXT
 	/* Vendor extension header bitmap */
 	uint32_t	wr_ext_bitmap;          /* 4 */
 
 	/*
 	 * This padding is needed because:
 	 * + the radiotap header is 8 bytes;
 	 * + the extension bitmap is 4 bytes;
 	 * + the tsf is 8 bytes, so it must start on an 8 byte
 	 *   boundary.
 	 */
 	uint32_t	wr_pad1;
 #endif	/* ATH_ENABLE_RADIOTAP_VENDOR_EXT */
 
 	/* Normal radiotap fields */
 	u_int64_t	wr_tsf;
 	u_int8_t	wr_flags;
 	u_int8_t	wr_rate;
 	int8_t		wr_antsignal;
 	int8_t		wr_antnoise;
 	u_int8_t	wr_antenna;
 	u_int8_t	wr_pad[3];
 	u_int32_t	wr_chan_flags;
 	u_int16_t	wr_chan_freq;
 	u_int8_t	wr_chan_ieee;
 	int8_t		wr_chan_maxpow;
 
 #ifdef	ATH_ENABLE_RADIOTAP_VENDOR_EXT
 	/*
 	 * Vendor header section, as required by the
 	 * presence of the vendor extension bit and bitmap
 	 * entry.
 	 *
 	 * XXX This must be aligned to a 4 byte address?
 	 * XXX or 8 byte address?
 	 */
 	struct ieee80211_radiotap_vendor_header wr_vh;  /* 6 bytes */
 
 	/*
 	 * Because of the lack of alignment enforced by the above
 	 * header, this vendor section won't be aligned in any
 	 * useful way.  So, this will include a two-byte version
 	 * value which will force the structure to be 4-byte aligned.
 	 */
 	struct ath_radiotap_vendor_hdr wr_v;
 #endif	/* ATH_ENABLE_RADIOTAP_VENDOR_EXT */
 } __packed;
 
 #define ATH_TX_RADIOTAP_PRESENT (		\
 	(1 << IEEE80211_RADIOTAP_FLAGS)		| \
 	(1 << IEEE80211_RADIOTAP_RATE)		| \
 	(1 << IEEE80211_RADIOTAP_DBM_TX_POWER)	| \
 	(1 << IEEE80211_RADIOTAP_ANTENNA)	| \
 	(1 << IEEE80211_RADIOTAP_XCHANNEL)	| \
 	0)
 
 struct ath_tx_radiotap_header {
 	struct ieee80211_radiotap_header wt_ihdr;
 	u_int8_t	wt_flags;
 	u_int8_t	wt_rate;
 	u_int8_t	wt_txpower;
 	u_int8_t	wt_antenna;
 	u_int32_t	wt_chan_flags;
 	u_int16_t	wt_chan_freq;
 	u_int8_t	wt_chan_ieee;
 	int8_t		wt_chan_maxpow;
 } __packed;
 
 /*
  * DFS ioctl commands
  */
 
 #define	DFS_SET_THRESH		2
 #define	DFS_GET_THRESH		3
 #define	DFS_RADARDETECTS	6
 
 /*
  * DFS ioctl parameter types
  */
 #define DFS_PARAM_FIRPWR	1
 #define DFS_PARAM_RRSSI		2
 #define DFS_PARAM_HEIGHT	3
 #define DFS_PARAM_PRSSI		4
 #define DFS_PARAM_INBAND	5
 #define DFS_PARAM_NOL		6	/* XXX not used in FreeBSD */
 #define DFS_PARAM_RELSTEP_EN	7
 #define DFS_PARAM_RELSTEP	8
 #define DFS_PARAM_RELPWR_EN	9
 #define DFS_PARAM_RELPWR	10
 #define DFS_PARAM_MAXLEN	11
 #define DFS_PARAM_USEFIR128	12
 #define DFS_PARAM_BLOCKRADAR	13
 #define DFS_PARAM_MAXRSSI_EN	14
 
 /* FreeBSD-specific start at 32 */
 #define	DFS_PARAM_ENABLE	32
 #define	DFS_PARAM_EN_EXTCH	33
 
 /*
  * Spectral ioctl parameter types
  */
 #define	SPECTRAL_PARAM_FFT_PERIOD	1
 #define	SPECTRAL_PARAM_SS_PERIOD	2
 #define	SPECTRAL_PARAM_SS_COUNT		3
 #define	SPECTRAL_PARAM_SS_SHORT_RPT	4
 #define	SPECTRAL_PARAM_ENABLED		5
 #define	SPECTRAL_PARAM_ACTIVE		6
 
 /*
  * Spectral control parameters
  */
 #define	SIOCGATHSPECTRAL	_IOWR('i', 151, struct ath_diag)
 
 #define	SPECTRAL_CONTROL_ENABLE		2
 #define	SPECTRAL_CONTROL_DISABLE	3
 #define	SPECTRAL_CONTROL_START		4
 #define	SPECTRAL_CONTROL_STOP		5
 #define	SPECTRAL_CONTROL_GET_PARAMS	6
 #define	SPECTRAL_CONTROL_SET_PARAMS	7
 #define	SPECTRAL_CONTROL_ENABLE_AT_RESET	8
 #define	SPECTRAL_CONTROL_DISABLE_AT_RESET	9
 
+/*
+ * Bluetooth coexistence control parameters
+ */
+#define	SIOCGATHBTCOEX		_IOWR('i', 152, struct ath_diag)
+
 #endif /* _DEV_ATH_ATHIOCTL_H */
Index: projects/clang391-import/sys/dev/hyperv/include/vmbus.h
===================================================================
--- projects/clang391-import/sys/dev/hyperv/include/vmbus.h	(revision 309262)
+++ projects/clang391-import/sys/dev/hyperv/include/vmbus.h	(revision 309263)
@@ -1,220 +1,223 @@
 /*-
  * Copyright (c) 2016 Microsoft Corp.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _VMBUS_H_
 #define _VMBUS_H_
 
 #include <sys/param.h>
 #include <sys/bus.h>
 
 /*
  * VMBUS version is 32 bit, upper 16 bit for major_number and lower
  * 16 bit for minor_number.
  *
  * 0.13  --  Windows Server 2008
  * 1.1   --  Windows 7
  * 2.4   --  Windows 8
  * 3.0   --  Windows 8.1
  */
 #define VMBUS_VERSION_WS2008		((0 << 16) | (13))
 #define VMBUS_VERSION_WIN7		((1 << 16) | (1))
 #define VMBUS_VERSION_WIN8		((2 << 16) | (4))
 #define VMBUS_VERSION_WIN8_1		((3 << 16) | (0))
 
 #define VMBUS_VERSION_MAJOR(ver)	(((uint32_t)(ver)) >> 16)
 #define VMBUS_VERSION_MINOR(ver)	(((uint32_t)(ver)) & 0xffff)
 
 /*
  * GPA stuffs.
  */
 struct vmbus_gpa_range {
 	uint32_t	gpa_len;
 	uint32_t	gpa_ofs;
 	uint64_t	gpa_page[0];
 } __packed;
 
 /* This is actually vmbus_gpa_range.gpa_page[1] */
 struct vmbus_gpa {
 	uint32_t	gpa_len;
 	uint32_t	gpa_ofs;
 	uint64_t	gpa_page;
 } __packed;
 
 #define VMBUS_CHANPKT_SIZE_SHIFT	3
 
 #define VMBUS_CHANPKT_GETLEN(pktlen)	\
 	(((int)(pktlen)) << VMBUS_CHANPKT_SIZE_SHIFT)
 
 struct vmbus_chanpkt_hdr {
 	uint16_t	cph_type;	/* VMBUS_CHANPKT_TYPE_ */
 	uint16_t	cph_hlen;	/* header len, in 8 bytes */
 	uint16_t	cph_tlen;	/* total len, in 8 bytes */
 	uint16_t	cph_flags;	/* VMBUS_CHANPKT_FLAG_ */
 	uint64_t	cph_xactid;
 } __packed;
 
 #define VMBUS_CHANPKT_TYPE_INBAND	0x0006
 #define VMBUS_CHANPKT_TYPE_RXBUF	0x0007
 #define VMBUS_CHANPKT_TYPE_GPA		0x0009
 #define VMBUS_CHANPKT_TYPE_COMP		0x000b
 
 #define VMBUS_CHANPKT_FLAG_NONE		0
 #define VMBUS_CHANPKT_FLAG_RC		0x0001	/* report completion */
 
 #define VMBUS_CHANPKT_CONST_DATA(pkt)		\
 	(const void *)((const uint8_t *)(pkt) +	\
 	VMBUS_CHANPKT_GETLEN((pkt)->cph_hlen))
 
 /* Include padding */
 #define VMBUS_CHANPKT_DATALEN(pkt)		\
 	(VMBUS_CHANPKT_GETLEN((pkt)->cph_tlen) -\
 	 VMBUS_CHANPKT_GETLEN((pkt)->cph_hlen))
 
 struct vmbus_rxbuf_desc {
 	uint32_t	rb_len;
 	uint32_t	rb_ofs;
 } __packed;
 
 struct vmbus_chanpkt_rxbuf {
 	struct vmbus_chanpkt_hdr cp_hdr;
 	uint16_t	cp_rxbuf_id;
 	uint16_t	cp_rsvd;
 	uint32_t	cp_rxbuf_cnt;
 	struct vmbus_rxbuf_desc cp_rxbuf[];
 } __packed;
 
 struct vmbus_chan_br {
 	void		*cbr;
 	bus_addr_t	cbr_paddr;
 	int		cbr_txsz;
 	int		cbr_rxsz;
 };
 
 struct vmbus_channel;
+struct vmbus_xact;
 struct vmbus_xact_ctx;
 struct hyperv_guid;
 struct task;
 struct taskqueue;
 
 typedef void	(*vmbus_chan_callback_t)(struct vmbus_channel *, void *);
 
 static __inline struct vmbus_channel *
 vmbus_get_channel(device_t dev)
 {
 	return device_get_ivars(dev);
 }
 
 /*
  * vmbus_chan_open_br()
  *
  * Return values:
  * 0			Succeeded.
  * EISCONN		Failed, and the memory passed through 'br' is still
  *			connected.  Callers must _not_ free the the memory
  *			passed through 'br', if this error happens.
  * other values		Failed.  The memory passed through 'br' is no longer
  *			connected.  Callers are free to do anything with the
  *			memory passed through 'br'.
  *
  *
  *
  * vmbus_chan_close_direct()
  *
  * NOTE:
  * Callers of this function _must_ make sure to close all sub-channels before
  * closing the primary channel.
  *
  * Return values:
  * 0			Succeeded.
  * EISCONN		Failed, and the memory associated with the bufring
  *			is still connected.  Callers must _not_ free the the
  *			memory associated with the bufring, if this error
  *			happens.
  * other values		Failed.  The memory associated with the bufring is
  *			no longer connected.  Callers are free to do anything
  *			with the memory associated with the bufring.
  */
 int		vmbus_chan_open(struct vmbus_channel *chan,
 		    int txbr_size, int rxbr_size, const void *udata, int udlen,
 		    vmbus_chan_callback_t cb, void *cbarg);
 int		vmbus_chan_open_br(struct vmbus_channel *chan,
 		    const struct vmbus_chan_br *cbr, const void *udata,
 		    int udlen, vmbus_chan_callback_t cb, void *cbarg);
 void		vmbus_chan_close(struct vmbus_channel *chan);
 int		vmbus_chan_close_direct(struct vmbus_channel *chan);
 void		vmbus_chan_intr_drain(struct vmbus_channel *chan);
 void		vmbus_chan_run_task(struct vmbus_channel *chan,
 		    struct task *task);
 void		vmbus_chan_set_orphan(struct vmbus_channel *chan,
 		    struct vmbus_xact_ctx *);
 void		vmbus_chan_unset_orphan(struct vmbus_channel *chan);
+const void	*vmbus_chan_xact_wait(const struct vmbus_channel *chan,
+		    struct vmbus_xact *xact, size_t *resp_len, bool can_sleep);
 
 int		vmbus_chan_gpadl_connect(struct vmbus_channel *chan,
 		    bus_addr_t paddr, int size, uint32_t *gpadl);
 int		vmbus_chan_gpadl_disconnect(struct vmbus_channel *chan,
 		    uint32_t gpadl);
 
 void		vmbus_chan_cpu_set(struct vmbus_channel *chan, int cpu);
 void		vmbus_chan_cpu_rr(struct vmbus_channel *chan);
 void		vmbus_chan_set_readbatch(struct vmbus_channel *chan, bool on);
 
 struct vmbus_channel **
 		vmbus_subchan_get(struct vmbus_channel *pri_chan,
 		    int subchan_cnt);
 void		vmbus_subchan_rel(struct vmbus_channel **subchan,
 		    int subchan_cnt);
 void		vmbus_subchan_drain(struct vmbus_channel *pri_chan);
 
 int		vmbus_chan_recv(struct vmbus_channel *chan, void *data, int *dlen,
 		    uint64_t *xactid);
 int		vmbus_chan_recv_pkt(struct vmbus_channel *chan,
 		    struct vmbus_chanpkt_hdr *pkt, int *pktlen);
 
 int		vmbus_chan_send(struct vmbus_channel *chan, uint16_t type,
 		    uint16_t flags, void *data, int dlen, uint64_t xactid);
 int		vmbus_chan_send_sglist(struct vmbus_channel *chan,
 		    struct vmbus_gpa sg[], int sglen, void *data, int dlen,
 		    uint64_t xactid);
 int		vmbus_chan_send_prplist(struct vmbus_channel *chan,
 		    struct vmbus_gpa_range *prp, int prp_cnt, void *data,
 		    int dlen, uint64_t xactid);
 
 uint32_t	vmbus_chan_id(const struct vmbus_channel *chan);
 uint32_t	vmbus_chan_subidx(const struct vmbus_channel *chan);
 bool		vmbus_chan_is_primary(const struct vmbus_channel *chan);
 bool		vmbus_chan_is_revoked(const struct vmbus_channel *chan);
 const struct hyperv_guid *
 		vmbus_chan_guid_inst(const struct vmbus_channel *chan);
 int		vmbus_chan_prplist_nelem(int br_size, int prpcnt_max,
 		    int dlen_max);
 bool		vmbus_chan_rx_empty(const struct vmbus_channel *chan);
 bool		vmbus_chan_tx_empty(const struct vmbus_channel *chan);
 struct taskqueue *
 		vmbus_chan_mgmt_tq(const struct vmbus_channel *chan);
 
 #endif	/* !_VMBUS_H_ */
Index: projects/clang391-import/sys/dev/hyperv/include/vmbus_xact.h
===================================================================
--- projects/clang391-import/sys/dev/hyperv/include/vmbus_xact.h	(revision 309262)
+++ projects/clang391-import/sys/dev/hyperv/include/vmbus_xact.h	(revision 309263)
@@ -1,63 +1,65 @@
 /*-
  * Copyright (c) 2016 Microsoft Corp.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _VMBUS_XACT_H_
 #define _VMBUS_XACT_H_
 
 #include <sys/param.h>
 #include <sys/bus.h>
 
 struct vmbus_xact;
 struct vmbus_xact_ctx;
 
 struct vmbus_xact_ctx	*vmbus_xact_ctx_create(bus_dma_tag_t dtag,
 			    size_t req_size, size_t resp_size,
 			    size_t priv_size);
 void			vmbus_xact_ctx_destroy(struct vmbus_xact_ctx *ctx);
 bool			vmbus_xact_ctx_orphan(struct vmbus_xact_ctx *ctx);
 
 struct vmbus_xact	*vmbus_xact_get(struct vmbus_xact_ctx *ctx,
 			    size_t req_len);
 void			vmbus_xact_put(struct vmbus_xact *xact);
 
 void			*vmbus_xact_req_data(const struct vmbus_xact *xact);
 bus_addr_t		vmbus_xact_req_paddr(const struct vmbus_xact *xact);
 void			*vmbus_xact_priv(const struct vmbus_xact *xact,
 			    size_t priv_len);
 void			vmbus_xact_activate(struct vmbus_xact *xact);
 void			vmbus_xact_deactivate(struct vmbus_xact *xact);
 const void		*vmbus_xact_wait(struct vmbus_xact *xact,
 			    size_t *resp_len);
 const void		*vmbus_xact_busywait(struct vmbus_xact *xact,
 			    size_t *resp_len);
+const void		*vmbus_xact_poll(struct vmbus_xact *xact,
+			    size_t *resp_len);
 void			vmbus_xact_wakeup(struct vmbus_xact *xact,
 			    const void *data, size_t dlen);
 void			vmbus_xact_ctx_wakeup(struct vmbus_xact_ctx *ctx,
 			    const void *data, size_t dlen);
 
 #endif	/* !_VMBUS_XACT_H_ */
Index: projects/clang391-import/sys/dev/hyperv/netvsc/hn_nvs.c
===================================================================
--- projects/clang391-import/sys/dev/hyperv/netvsc/hn_nvs.c	(revision 309262)
+++ projects/clang391-import/sys/dev/hyperv/netvsc/hn_nvs.c	(revision 309263)
@@ -1,706 +1,721 @@
 /*-
  * Copyright (c) 2009-2012,2016 Microsoft Corp.
  * Copyright (c) 2010-2012 Citrix Inc.
  * Copyright (c) 2012 NetApp Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 /*
  * Network Virtualization Service.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet6.h"
 #include "opt_inet.h"
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/socket.h>
 #include <sys/systm.h>
 #include <sys/taskqueue.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_media.h>
 
 #include <netinet/in.h>
 #include <netinet/tcp_lro.h>
 
 #include <dev/hyperv/include/hyperv.h>
 #include <dev/hyperv/include/hyperv_busdma.h>
 #include <dev/hyperv/include/vmbus.h>
 #include <dev/hyperv/include/vmbus_xact.h>
 
 #include <dev/hyperv/netvsc/ndis.h>
 #include <dev/hyperv/netvsc/if_hnreg.h>
 #include <dev/hyperv/netvsc/if_hnvar.h>
 #include <dev/hyperv/netvsc/hn_nvs.h>
 
 static int			hn_nvs_conn_chim(struct hn_softc *);
 static int			hn_nvs_conn_rxbuf(struct hn_softc *);
-static int			hn_nvs_disconn_chim(struct hn_softc *);
-static int			hn_nvs_disconn_rxbuf(struct hn_softc *);
+static void			hn_nvs_disconn_chim(struct hn_softc *);
+static void			hn_nvs_disconn_rxbuf(struct hn_softc *);
 static int			hn_nvs_conf_ndis(struct hn_softc *, int);
 static int			hn_nvs_init_ndis(struct hn_softc *);
 static int			hn_nvs_doinit(struct hn_softc *, uint32_t);
 static int			hn_nvs_init(struct hn_softc *);
 static const void		*hn_nvs_xact_execute(struct hn_softc *,
 				    struct vmbus_xact *, void *, int,
 				    size_t *, uint32_t);
 static void			hn_nvs_sent_none(struct hn_nvs_sendctx *,
 				    struct hn_softc *, struct vmbus_channel *,
 				    const void *, int);
 
 struct hn_nvs_sendctx		hn_nvs_sendctx_none =
     HN_NVS_SENDCTX_INITIALIZER(hn_nvs_sent_none, NULL);
 
 static const uint32_t		hn_nvs_version[] = {
 	HN_NVS_VERSION_5,
 	HN_NVS_VERSION_4,
 	HN_NVS_VERSION_2,
 	HN_NVS_VERSION_1
 };
 
 static const void *
 hn_nvs_xact_execute(struct hn_softc *sc, struct vmbus_xact *xact,
     void *req, int reqlen, size_t *resplen0, uint32_t type)
 {
 	struct hn_nvs_sendctx sndc;
 	size_t resplen, min_resplen = *resplen0;
 	const struct hn_nvs_hdr *hdr;
 	int error;
 
 	KASSERT(min_resplen >= sizeof(*hdr),
 	    ("invalid minimum response len %zu", min_resplen));
 
 	/*
 	 * Execute the xact setup by the caller.
 	 */
 	hn_nvs_sendctx_init(&sndc, hn_nvs_sent_xact, xact);
 
 	vmbus_xact_activate(xact);
 	error = hn_nvs_send(sc->hn_prichan, VMBUS_CHANPKT_FLAG_RC,
 	    req, reqlen, &sndc);
 	if (error) {
 		vmbus_xact_deactivate(xact);
 		return (NULL);
 	}
-	if (HN_CAN_SLEEP(sc))
-		hdr = vmbus_xact_wait(xact, &resplen);
-	else
-		hdr = vmbus_xact_busywait(xact, &resplen);
+	hdr = vmbus_chan_xact_wait(sc->hn_prichan, xact, &resplen,
+	    HN_CAN_SLEEP(sc));
 
 	/*
 	 * Check this NVS response message.
 	 */
 	if (resplen < min_resplen) {
 		if_printf(sc->hn_ifp, "invalid NVS resp len %zu\n", resplen);
 		return (NULL);
 	}
 	if (hdr->nvs_type != type) {
 		if_printf(sc->hn_ifp, "unexpected NVS resp 0x%08x, "
 		    "expect 0x%08x\n", hdr->nvs_type, type);
 		return (NULL);
 	}
 	/* All pass! */
 	*resplen0 = resplen;
 	return (hdr);
 }
 
 static __inline int
 hn_nvs_req_send(struct hn_softc *sc, void *req, int reqlen)
 {
 
 	return (hn_nvs_send(sc->hn_prichan, VMBUS_CHANPKT_FLAG_NONE,
 	    req, reqlen, &hn_nvs_sendctx_none));
 }
 
 static int 
 hn_nvs_conn_rxbuf(struct hn_softc *sc)
 {
 	struct vmbus_xact *xact = NULL;
 	struct hn_nvs_rxbuf_conn *conn;
 	const struct hn_nvs_rxbuf_connresp *resp;
 	size_t resp_len;
 	uint32_t status;
 	int error, rxbuf_size;
 
 	/*
 	 * Limit RXBUF size for old NVS.
 	 */
 	if (sc->hn_nvs_ver <= HN_NVS_VERSION_2)
 		rxbuf_size = HN_RXBUF_SIZE_COMPAT;
 	else
 		rxbuf_size = HN_RXBUF_SIZE;
 
 	/*
 	 * Connect the RXBUF GPADL to the primary channel.
 	 *
 	 * NOTE:
 	 * Only primary channel has RXBUF connected to it.  Sub-channels
 	 * just share this RXBUF.
 	 */
 	error = vmbus_chan_gpadl_connect(sc->hn_prichan,
 	    sc->hn_rxbuf_dma.hv_paddr, rxbuf_size, &sc->hn_rxbuf_gpadl);
 	if (error) {
 		if_printf(sc->hn_ifp, "rxbuf gpadl conn failed: %d\n",
 		    error);
 		goto cleanup;
 	}
 
 	/*
 	 * Connect RXBUF to NVS.
 	 */
 
 	xact = vmbus_xact_get(sc->hn_xact, sizeof(*conn));
 	if (xact == NULL) {
 		if_printf(sc->hn_ifp, "no xact for nvs rxbuf conn\n");
 		error = ENXIO;
 		goto cleanup;
 	}
 	conn = vmbus_xact_req_data(xact);
 	conn->nvs_type = HN_NVS_TYPE_RXBUF_CONN;
 	conn->nvs_gpadl = sc->hn_rxbuf_gpadl;
 	conn->nvs_sig = HN_NVS_RXBUF_SIG;
 
 	resp_len = sizeof(*resp);
 	resp = hn_nvs_xact_execute(sc, xact, conn, sizeof(*conn), &resp_len,
 	    HN_NVS_TYPE_RXBUF_CONNRESP);
 	if (resp == NULL) {
 		if_printf(sc->hn_ifp, "exec nvs rxbuf conn failed\n");
 		error = EIO;
 		goto cleanup;
 	}
 
 	status = resp->nvs_status;
 	vmbus_xact_put(xact);
 	xact = NULL;
 
 	if (status != HN_NVS_STATUS_OK) {
 		if_printf(sc->hn_ifp, "nvs rxbuf conn failed: %x\n", status);
 		error = EIO;
 		goto cleanup;
 	}
 	sc->hn_flags |= HN_FLAG_RXBUF_CONNECTED;
 
 	return (0);
 
 cleanup:
 	if (xact != NULL)
 		vmbus_xact_put(xact);
 	hn_nvs_disconn_rxbuf(sc);
 	return (error);
 }
 
 static int 
 hn_nvs_conn_chim(struct hn_softc *sc)
 {
 	struct vmbus_xact *xact = NULL;
 	struct hn_nvs_chim_conn *chim;
 	const struct hn_nvs_chim_connresp *resp;
 	size_t resp_len;
 	uint32_t status, sectsz;
 	int error;
 
 	/*
 	 * Connect chimney sending buffer GPADL to the primary channel.
 	 *
 	 * NOTE:
 	 * Only primary channel has chimney sending buffer connected to it.
 	 * Sub-channels just share this chimney sending buffer.
 	 */
 	error = vmbus_chan_gpadl_connect(sc->hn_prichan,
   	    sc->hn_chim_dma.hv_paddr, HN_CHIM_SIZE, &sc->hn_chim_gpadl);
 	if (error) {
 		if_printf(sc->hn_ifp, "chim gpadl conn failed: %d\n", error);
 		goto cleanup;
 	}
 
 	/*
 	 * Connect chimney sending buffer to NVS
 	 */
 
 	xact = vmbus_xact_get(sc->hn_xact, sizeof(*chim));
 	if (xact == NULL) {
 		if_printf(sc->hn_ifp, "no xact for nvs chim conn\n");
 		error = ENXIO;
 		goto cleanup;
 	}
 	chim = vmbus_xact_req_data(xact);
 	chim->nvs_type = HN_NVS_TYPE_CHIM_CONN;
 	chim->nvs_gpadl = sc->hn_chim_gpadl;
 	chim->nvs_sig = HN_NVS_CHIM_SIG;
 
 	resp_len = sizeof(*resp);
 	resp = hn_nvs_xact_execute(sc, xact, chim, sizeof(*chim), &resp_len,
 	    HN_NVS_TYPE_CHIM_CONNRESP);
 	if (resp == NULL) {
 		if_printf(sc->hn_ifp, "exec nvs chim conn failed\n");
 		error = EIO;
 		goto cleanup;
 	}
 
 	status = resp->nvs_status;
 	sectsz = resp->nvs_sectsz;
 	vmbus_xact_put(xact);
 	xact = NULL;
 
 	if (status != HN_NVS_STATUS_OK) {
 		if_printf(sc->hn_ifp, "nvs chim conn failed: %x\n", status);
 		error = EIO;
 		goto cleanup;
 	}
 	if (sectsz == 0) {
+		/*
+		 * Can't use chimney sending buffer; done!
+		 */
 		if_printf(sc->hn_ifp, "zero chimney sending buffer "
 		    "section size\n");
+		sc->hn_chim_szmax = 0;
+		sc->hn_chim_cnt = 0;
+		sc->hn_flags |= HN_FLAG_CHIM_CONNECTED;
 		return (0);
 	}
 
 	sc->hn_chim_szmax = sectsz;
 	sc->hn_chim_cnt = HN_CHIM_SIZE / sc->hn_chim_szmax;
 	if (HN_CHIM_SIZE % sc->hn_chim_szmax != 0) {
 		if_printf(sc->hn_ifp, "chimney sending sections are "
 		    "not properly aligned\n");
 	}
 	if (sc->hn_chim_cnt % LONG_BIT != 0) {
 		if_printf(sc->hn_ifp, "discard %d chimney sending sections\n",
 		    sc->hn_chim_cnt % LONG_BIT);
 	}
 
 	sc->hn_chim_bmap_cnt = sc->hn_chim_cnt / LONG_BIT;
 	sc->hn_chim_bmap = malloc(sc->hn_chim_bmap_cnt * sizeof(u_long),
 	    M_DEVBUF, M_WAITOK | M_ZERO);
 
 	/* Done! */
 	sc->hn_flags |= HN_FLAG_CHIM_CONNECTED;
 	if (bootverbose) {
 		if_printf(sc->hn_ifp, "chimney sending buffer %d/%d\n",
 		    sc->hn_chim_szmax, sc->hn_chim_cnt);
 	}
 	return (0);
 
 cleanup:
 	if (xact != NULL)
 		vmbus_xact_put(xact);
 	hn_nvs_disconn_chim(sc);
 	return (error);
 }
 
-static int
+static void
 hn_nvs_disconn_rxbuf(struct hn_softc *sc)
 {
 	int error;
 
 	if (sc->hn_flags & HN_FLAG_RXBUF_CONNECTED) {
 		struct hn_nvs_rxbuf_disconn disconn;
 
 		/*
 		 * Disconnect RXBUF from NVS.
 		 */
 		memset(&disconn, 0, sizeof(disconn));
 		disconn.nvs_type = HN_NVS_TYPE_RXBUF_DISCONN;
 		disconn.nvs_sig = HN_NVS_RXBUF_SIG;
 
 		/* NOTE: No response. */
 		error = hn_nvs_req_send(sc, &disconn, sizeof(disconn));
 		if (error) {
 			if_printf(sc->hn_ifp,
 			    "send nvs rxbuf disconn failed: %d\n", error);
-			return (error);
+			/*
+			 * Fine for a revoked channel, since the hypervisor
+			 * does not drain TX bufring for a revoked channel.
+			 */
+			if (!vmbus_chan_is_revoked(sc->hn_prichan))
+				sc->hn_flags |= HN_FLAG_RXBUF_REF;
 		}
 		sc->hn_flags &= ~HN_FLAG_RXBUF_CONNECTED;
 
 		/*
 		 * Wait for the hypervisor to receive this NVS request.
 		 *
 		 * NOTE:
 		 * The TX bufring will not be drained by the hypervisor,
 		 * if the primary channel is revoked.
 		 */
 		while (!vmbus_chan_tx_empty(sc->hn_prichan) &&
 		    !vmbus_chan_is_revoked(sc->hn_prichan))
 			pause("waittx", 1);
 		/*
 		 * Linger long enough for NVS to disconnect RXBUF.
 		 */
 		pause("lingtx", (200 * hz) / 1000);
 	}
 
 	if (sc->hn_rxbuf_gpadl != 0) {
 		/*
 		 * Disconnect RXBUF from primary channel.
 		 */
 		error = vmbus_chan_gpadl_disconnect(sc->hn_prichan,
 		    sc->hn_rxbuf_gpadl);
 		if (error) {
 			if_printf(sc->hn_ifp,
 			    "rxbuf gpadl disconn failed: %d\n", error);
-			return (error);
+			sc->hn_flags |= HN_FLAG_RXBUF_REF;
 		}
 		sc->hn_rxbuf_gpadl = 0;
 	}
-	return (0);
 }
 
-static int
+static void
 hn_nvs_disconn_chim(struct hn_softc *sc)
 {
 	int error;
 
 	if (sc->hn_flags & HN_FLAG_CHIM_CONNECTED) {
 		struct hn_nvs_chim_disconn disconn;
 
 		/*
 		 * Disconnect chimney sending buffer from NVS.
 		 */
 		memset(&disconn, 0, sizeof(disconn));
 		disconn.nvs_type = HN_NVS_TYPE_CHIM_DISCONN;
 		disconn.nvs_sig = HN_NVS_CHIM_SIG;
 
 		/* NOTE: No response. */
 		error = hn_nvs_req_send(sc, &disconn, sizeof(disconn));
 		if (error) {
 			if_printf(sc->hn_ifp,
 			    "send nvs chim disconn failed: %d\n", error);
-			return (error);
+			/*
+			 * Fine for a revoked channel, since the hypervisor
+			 * does not drain TX bufring for a revoked channel.
+			 */
+			if (!vmbus_chan_is_revoked(sc->hn_prichan))
+				sc->hn_flags |= HN_FLAG_CHIM_REF;
 		}
 		sc->hn_flags &= ~HN_FLAG_CHIM_CONNECTED;
 
 		/*
 		 * Wait for the hypervisor to receive this NVS request.
 		 *
 		 * NOTE:
 		 * The TX bufring will not be drained by the hypervisor,
 		 * if the primary channel is revoked.
 		 */
 		while (!vmbus_chan_tx_empty(sc->hn_prichan) &&
 		    !vmbus_chan_is_revoked(sc->hn_prichan))
 			pause("waittx", 1);
 		/*
 		 * Linger long enough for NVS to disconnect chimney
 		 * sending buffer.
 		 */
 		pause("lingtx", (200 * hz) / 1000);
 	}
 
 	if (sc->hn_chim_gpadl != 0) {
 		/*
 		 * Disconnect chimney sending buffer from primary channel.
 		 */
 		error = vmbus_chan_gpadl_disconnect(sc->hn_prichan,
 		    sc->hn_chim_gpadl);
 		if (error) {
 			if_printf(sc->hn_ifp,
 			    "chim gpadl disconn failed: %d\n", error);
-			return (error);
+			sc->hn_flags |= HN_FLAG_CHIM_REF;
 		}
 		sc->hn_chim_gpadl = 0;
 	}
 
 	if (sc->hn_chim_bmap != NULL) {
 		free(sc->hn_chim_bmap, M_DEVBUF);
 		sc->hn_chim_bmap = NULL;
+		sc->hn_chim_bmap_cnt = 0;
 	}
-	return (0);
 }
 
 static int
 hn_nvs_doinit(struct hn_softc *sc, uint32_t nvs_ver)
 {
 	struct vmbus_xact *xact;
 	struct hn_nvs_init *init;
 	const struct hn_nvs_init_resp *resp;
 	size_t resp_len;
 	uint32_t status;
 
 	xact = vmbus_xact_get(sc->hn_xact, sizeof(*init));
 	if (xact == NULL) {
 		if_printf(sc->hn_ifp, "no xact for nvs init\n");
 		return (ENXIO);
 	}
 	init = vmbus_xact_req_data(xact);
 	init->nvs_type = HN_NVS_TYPE_INIT;
 	init->nvs_ver_min = nvs_ver;
 	init->nvs_ver_max = nvs_ver;
 
 	resp_len = sizeof(*resp);
 	resp = hn_nvs_xact_execute(sc, xact, init, sizeof(*init), &resp_len,
 	    HN_NVS_TYPE_INIT_RESP);
 	if (resp == NULL) {
 		if_printf(sc->hn_ifp, "exec init failed\n");
 		vmbus_xact_put(xact);
 		return (EIO);
 	}
 
 	status = resp->nvs_status;
 	vmbus_xact_put(xact);
 
 	if (status != HN_NVS_STATUS_OK) {
 		if (bootverbose) {
 			/*
 			 * Caller may try another NVS version, and will log
 			 * error if there are no more NVS versions to try,
 			 * so don't bark out loud here.
 			 */
 			if_printf(sc->hn_ifp, "nvs init failed for ver 0x%x\n",
 			    nvs_ver);
 		}
 		return (EINVAL);
 	}
 	return (0);
 }
 
 /*
  * Configure MTU and enable VLAN.
  */
 static int
 hn_nvs_conf_ndis(struct hn_softc *sc, int mtu)
 {
 	struct hn_nvs_ndis_conf conf;
 	int error;
 
 	memset(&conf, 0, sizeof(conf));
 	conf.nvs_type = HN_NVS_TYPE_NDIS_CONF;
 	conf.nvs_mtu = mtu;
 	conf.nvs_caps = HN_NVS_NDIS_CONF_VLAN;
 
 	/* NOTE: No response. */
 	error = hn_nvs_req_send(sc, &conf, sizeof(conf));
 	if (error) {
 		if_printf(sc->hn_ifp, "send nvs ndis conf failed: %d\n", error);
 		return (error);
 	}
 
 	if (bootverbose)
 		if_printf(sc->hn_ifp, "nvs ndis conf done\n");
 	sc->hn_caps |= HN_CAP_MTU | HN_CAP_VLAN;
 	return (0);
 }
 
 static int
 hn_nvs_init_ndis(struct hn_softc *sc)
 {
 	struct hn_nvs_ndis_init ndis;
 	int error;
 
 	memset(&ndis, 0, sizeof(ndis));
 	ndis.nvs_type = HN_NVS_TYPE_NDIS_INIT;
 	ndis.nvs_ndis_major = HN_NDIS_VERSION_MAJOR(sc->hn_ndis_ver);
 	ndis.nvs_ndis_minor = HN_NDIS_VERSION_MINOR(sc->hn_ndis_ver);
 
 	/* NOTE: No response. */
 	error = hn_nvs_req_send(sc, &ndis, sizeof(ndis));
 	if (error)
 		if_printf(sc->hn_ifp, "send nvs ndis init failed: %d\n", error);
 	return (error);
 }
 
 static int
 hn_nvs_init(struct hn_softc *sc)
 {
 	int i, error;
 
 	if (device_is_attached(sc->hn_dev)) {
 		/*
 		 * NVS version and NDIS version MUST NOT be changed.
 		 */
 		if (bootverbose) {
 			if_printf(sc->hn_ifp, "reinit NVS version 0x%x, "
 			    "NDIS version %u.%u\n", sc->hn_nvs_ver,
 			    HN_NDIS_VERSION_MAJOR(sc->hn_ndis_ver),
 			    HN_NDIS_VERSION_MINOR(sc->hn_ndis_ver));
 		}
 
 		error = hn_nvs_doinit(sc, sc->hn_nvs_ver);
 		if (error) {
 			if_printf(sc->hn_ifp, "reinit NVS version 0x%x "
 			    "failed: %d\n", sc->hn_nvs_ver, error);
 			return (error);
 		}
 		goto done;
 	}
 
 	/*
 	 * Find the supported NVS version and set NDIS version accordingly.
 	 */
 	for (i = 0; i < nitems(hn_nvs_version); ++i) {
 		error = hn_nvs_doinit(sc, hn_nvs_version[i]);
 		if (!error) {
 			sc->hn_nvs_ver = hn_nvs_version[i];
 
 			/* Set NDIS version according to NVS version. */
 			sc->hn_ndis_ver = HN_NDIS_VERSION_6_30;
 			if (sc->hn_nvs_ver <= HN_NVS_VERSION_4)
 				sc->hn_ndis_ver = HN_NDIS_VERSION_6_1;
 
 			if (bootverbose) {
 				if_printf(sc->hn_ifp, "NVS version 0x%x, "
 				    "NDIS version %u.%u\n", sc->hn_nvs_ver,
 				    HN_NDIS_VERSION_MAJOR(sc->hn_ndis_ver),
 				    HN_NDIS_VERSION_MINOR(sc->hn_ndis_ver));
 			}
 			goto done;
 		}
 	}
 	if_printf(sc->hn_ifp, "no NVS available\n");
 	return (ENXIO);
 
 done:
 	if (sc->hn_nvs_ver >= HN_NVS_VERSION_5)
 		sc->hn_caps |= HN_CAP_HASHVAL;
 	return (0);
 }
 
 int
 hn_nvs_attach(struct hn_softc *sc, int mtu)
 {
 	int error;
 
 	/*
 	 * Initialize NVS.
 	 */
 	error = hn_nvs_init(sc);
 	if (error)
 		return (error);
 
 	if (sc->hn_nvs_ver >= HN_NVS_VERSION_2) {
 		/*
 		 * Configure NDIS before initializing it.
 		 */
 		error = hn_nvs_conf_ndis(sc, mtu);
 		if (error)
 			return (error);
 	}
 
 	/*
 	 * Initialize NDIS.
 	 */
 	error = hn_nvs_init_ndis(sc);
 	if (error)
 		return (error);
 
 	/*
 	 * Connect RXBUF.
 	 */
 	error = hn_nvs_conn_rxbuf(sc);
 	if (error)
 		return (error);
 
 	/*
 	 * Connect chimney sending buffer.
 	 */
 	error = hn_nvs_conn_chim(sc);
-	if (error)
+	if (error) {
+		hn_nvs_disconn_rxbuf(sc);
 		return (error);
+	}
 	return (0);
 }
 
 void
 hn_nvs_detach(struct hn_softc *sc)
 {
 
 	/* NOTE: there are no requests to stop the NVS. */
 	hn_nvs_disconn_rxbuf(sc);
 	hn_nvs_disconn_chim(sc);
 }
 
 void
 hn_nvs_sent_xact(struct hn_nvs_sendctx *sndc,
     struct hn_softc *sc __unused, struct vmbus_channel *chan __unused,
     const void *data, int dlen)
 {
 
 	vmbus_xact_wakeup(sndc->hn_cbarg, data, dlen);
 }
 
 static void
 hn_nvs_sent_none(struct hn_nvs_sendctx *sndc __unused,
     struct hn_softc *sc __unused, struct vmbus_channel *chan __unused,
     const void *data __unused, int dlen __unused)
 {
 	/* EMPTY */
 }
 
 int
 hn_nvs_alloc_subchans(struct hn_softc *sc, int *nsubch0)
 {
 	struct vmbus_xact *xact;
 	struct hn_nvs_subch_req *req;
 	const struct hn_nvs_subch_resp *resp;
 	int error, nsubch_req;
 	uint32_t nsubch;
 	size_t resp_len;
 
 	nsubch_req = *nsubch0;
 	KASSERT(nsubch_req > 0, ("invalid # of sub-channels %d", nsubch_req));
 
 	xact = vmbus_xact_get(sc->hn_xact, sizeof(*req));
 	if (xact == NULL) {
 		if_printf(sc->hn_ifp, "no xact for nvs subch alloc\n");
 		return (ENXIO);
 	}
 	req = vmbus_xact_req_data(xact);
 	req->nvs_type = HN_NVS_TYPE_SUBCH_REQ;
 	req->nvs_op = HN_NVS_SUBCH_OP_ALLOC;
 	req->nvs_nsubch = nsubch_req;
 
 	resp_len = sizeof(*resp);
 	resp = hn_nvs_xact_execute(sc, xact, req, sizeof(*req), &resp_len,
 	    HN_NVS_TYPE_SUBCH_RESP);
 	if (resp == NULL) {
 		if_printf(sc->hn_ifp, "exec nvs subch alloc failed\n");
 		error = EIO;
 		goto done;
 	}
 	if (resp->nvs_status != HN_NVS_STATUS_OK) {
 		if_printf(sc->hn_ifp, "nvs subch alloc failed: %x\n",
 		    resp->nvs_status);
 		error = EIO;
 		goto done;
 	}
 
 	nsubch = resp->nvs_nsubch;
 	if (nsubch > nsubch_req) {
 		if_printf(sc->hn_ifp, "%u subchans are allocated, "
 		    "requested %d\n", nsubch, nsubch_req);
 		nsubch = nsubch_req;
 	}
 	*nsubch0 = nsubch;
 	error = 0;
 done:
 	vmbus_xact_put(xact);
 	return (error);
 }
 
 int
 hn_nvs_send_rndis_ctrl(struct vmbus_channel *chan,
     struct hn_nvs_sendctx *sndc, struct vmbus_gpa *gpa, int gpa_cnt)
 {
 
 	return hn_nvs_send_rndis_sglist(chan, HN_NVS_RNDIS_MTYPE_CTRL,
 	    sndc, gpa, gpa_cnt);
 }
Index: projects/clang391-import/sys/dev/hyperv/netvsc/hn_rndis.c
===================================================================
--- projects/clang391-import/sys/dev/hyperv/netvsc/hn_rndis.c	(revision 309262)
+++ projects/clang391-import/sys/dev/hyperv/netvsc/hn_rndis.c	(revision 309263)
@@ -1,996 +1,993 @@
 /*-
  * Copyright (c) 2009-2012,2016 Microsoft Corp.
  * Copyright (c) 2010-2012 Citrix Inc.
  * Copyright (c) 2012 NetApp Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet6.h"
 #include "opt_inet.h"
 
 #include <sys/param.h>
 #include <sys/socket.h>
 #include <sys/systm.h>
 #include <sys/taskqueue.h>
 
 #include <machine/atomic.h>
 
 #include <net/ethernet.h>
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_media.h>
 #include <net/rndis.h>
 
 #include <netinet/in.h>
 #include <netinet/ip.h>
 #include <netinet/tcp_lro.h>
 
 #include <dev/hyperv/include/hyperv.h>
 #include <dev/hyperv/include/hyperv_busdma.h>
 #include <dev/hyperv/include/vmbus.h>
 #include <dev/hyperv/include/vmbus_xact.h>
 
 #include <dev/hyperv/netvsc/ndis.h>
 #include <dev/hyperv/netvsc/if_hnreg.h>
 #include <dev/hyperv/netvsc/if_hnvar.h>
 #include <dev/hyperv/netvsc/hn_nvs.h>
 #include <dev/hyperv/netvsc/hn_rndis.h>
 
 #define HN_RNDIS_RID_COMPAT_MASK	0xffff
 #define HN_RNDIS_RID_COMPAT_MAX		HN_RNDIS_RID_COMPAT_MASK
 
 #define HN_RNDIS_XFER_SIZE		2048
 
 #define HN_NDIS_TXCSUM_CAP_IP4		\
 	(NDIS_TXCSUM_CAP_IP4 | NDIS_TXCSUM_CAP_IP4OPT)
 #define HN_NDIS_TXCSUM_CAP_TCP4		\
 	(NDIS_TXCSUM_CAP_TCP4 | NDIS_TXCSUM_CAP_TCP4OPT)
 #define HN_NDIS_TXCSUM_CAP_TCP6		\
 	(NDIS_TXCSUM_CAP_TCP6 | NDIS_TXCSUM_CAP_TCP6OPT | \
 	 NDIS_TXCSUM_CAP_IP6EXT)
 #define HN_NDIS_TXCSUM_CAP_UDP6		\
 	(NDIS_TXCSUM_CAP_UDP6 | NDIS_TXCSUM_CAP_IP6EXT)
 #define HN_NDIS_LSOV2_CAP_IP6		\
 	(NDIS_LSOV2_CAP_IP6EXT | NDIS_LSOV2_CAP_TCP6OPT)
 
 static const void	*hn_rndis_xact_exec1(struct hn_softc *,
 			    struct vmbus_xact *, size_t,
 			    struct hn_nvs_sendctx *, size_t *);
 static const void	*hn_rndis_xact_execute(struct hn_softc *,
 			    struct vmbus_xact *, uint32_t, size_t, size_t *,
 			    uint32_t);
 static int		hn_rndis_query(struct hn_softc *, uint32_t,
 			    const void *, size_t, void *, size_t *);
 static int		hn_rndis_query2(struct hn_softc *, uint32_t,
 			    const void *, size_t, void *, size_t *, size_t);
 static int		hn_rndis_set(struct hn_softc *, uint32_t,
 			    const void *, size_t);
 static int		hn_rndis_init(struct hn_softc *);
 static int		hn_rndis_halt(struct hn_softc *);
 static int		hn_rndis_conf_offload(struct hn_softc *, int);
 static int		hn_rndis_query_hwcaps(struct hn_softc *,
 			    struct ndis_offload *);
 
 static __inline uint32_t
 hn_rndis_rid(struct hn_softc *sc)
 {
 	uint32_t rid;
 
 again:
 	rid = atomic_fetchadd_int(&sc->hn_rndis_rid, 1);
 	if (rid == 0)
 		goto again;
 
 	/* Use upper 16 bits for non-compat RNDIS messages. */
 	return ((rid & 0xffff) << 16);
 }
 
 void
 hn_rndis_rx_ctrl(struct hn_softc *sc, const void *data, int dlen)
 {
 	const struct rndis_comp_hdr *comp;
 	const struct rndis_msghdr *hdr;
 
 	KASSERT(dlen >= sizeof(*hdr), ("invalid RNDIS msg\n"));
 	hdr = data;
 
 	switch (hdr->rm_type) {
 	case REMOTE_NDIS_INITIALIZE_CMPLT:
 	case REMOTE_NDIS_QUERY_CMPLT:
 	case REMOTE_NDIS_SET_CMPLT:
 	case REMOTE_NDIS_KEEPALIVE_CMPLT:	/* unused */
 		if (dlen < sizeof(*comp)) {
 			if_printf(sc->hn_ifp, "invalid RNDIS cmplt\n");
 			return;
 		}
 		comp = data;
 
 		KASSERT(comp->rm_rid > HN_RNDIS_RID_COMPAT_MAX,
 		    ("invalid RNDIS rid 0x%08x\n", comp->rm_rid));
 		vmbus_xact_ctx_wakeup(sc->hn_xact, comp, dlen);
 		break;
 
 	case REMOTE_NDIS_RESET_CMPLT:
 		/*
 		 * Reset completed, no rid.
 		 *
 		 * NOTE:
 		 * RESET is not issued by hn(4), so this message should
 		 * _not_ be observed.
 		 */
 		if_printf(sc->hn_ifp, "RESET cmplt received\n");
 		break;
 
 	default:
 		if_printf(sc->hn_ifp, "unknown RNDIS msg 0x%x\n",
 		    hdr->rm_type);
 		break;
 	}
 }
 
 int
 hn_rndis_get_eaddr(struct hn_softc *sc, uint8_t *eaddr)
 {
 	size_t eaddr_len;
 	int error;
 
 	eaddr_len = ETHER_ADDR_LEN;
 	error = hn_rndis_query(sc, OID_802_3_PERMANENT_ADDRESS, NULL, 0,
 	    eaddr, &eaddr_len);
 	if (error)
 		return (error);
 	if (eaddr_len != ETHER_ADDR_LEN) {
 		if_printf(sc->hn_ifp, "invalid eaddr len %zu\n", eaddr_len);
 		return (EINVAL);
 	}
 	return (0);
 }
 
 int
 hn_rndis_get_linkstatus(struct hn_softc *sc, uint32_t *link_status)
 {
 	size_t size;
 	int error;
 
 	size = sizeof(*link_status);
 	error = hn_rndis_query(sc, OID_GEN_MEDIA_CONNECT_STATUS, NULL, 0,
 	    link_status, &size);
 	if (error)
 		return (error);
 	if (size != sizeof(uint32_t)) {
 		if_printf(sc->hn_ifp, "invalid link status len %zu\n", size);
 		return (EINVAL);
 	}
 	return (0);
 }
 
 static const void *
 hn_rndis_xact_exec1(struct hn_softc *sc, struct vmbus_xact *xact, size_t reqlen,
     struct hn_nvs_sendctx *sndc, size_t *comp_len)
 {
 	struct vmbus_gpa gpa[HN_XACT_REQ_PGCNT];
 	int gpa_cnt, error;
 	bus_addr_t paddr;
 
 	KASSERT(reqlen <= HN_XACT_REQ_SIZE && reqlen > 0,
 	    ("invalid request length %zu", reqlen));
 
 	/*
 	 * Setup the SG list.
 	 */
 	paddr = vmbus_xact_req_paddr(xact);
 	KASSERT((paddr & PAGE_MASK) == 0,
 	    ("vmbus xact request is not page aligned 0x%jx", (uintmax_t)paddr));
 	for (gpa_cnt = 0; gpa_cnt < HN_XACT_REQ_PGCNT; ++gpa_cnt) {
 		int len = PAGE_SIZE;
 
 		if (reqlen == 0)
 			break;
 		if (reqlen < len)
 			len = reqlen;
 
 		gpa[gpa_cnt].gpa_page = atop(paddr) + gpa_cnt;
 		gpa[gpa_cnt].gpa_len = len;
 		gpa[gpa_cnt].gpa_ofs = 0;
 
 		reqlen -= len;
 	}
 	KASSERT(reqlen == 0, ("still have %zu request data left", reqlen));
 
 	/*
 	 * Send this RNDIS control message and wait for its completion
 	 * message.
 	 */
 	vmbus_xact_activate(xact);
 	error = hn_nvs_send_rndis_ctrl(sc->hn_prichan, sndc, gpa, gpa_cnt);
 	if (error) {
 		vmbus_xact_deactivate(xact);
 		if_printf(sc->hn_ifp, "RNDIS ctrl send failed: %d\n", error);
 		return (NULL);
 	}
-	if (HN_CAN_SLEEP(sc))
-		return (vmbus_xact_wait(xact, comp_len));
-	else
-		return (vmbus_xact_busywait(xact, comp_len));
+	return (vmbus_chan_xact_wait(sc->hn_prichan, xact, comp_len,
+	    HN_CAN_SLEEP(sc)));
 }
 
 static const void *
 hn_rndis_xact_execute(struct hn_softc *sc, struct vmbus_xact *xact, uint32_t rid,
     size_t reqlen, size_t *comp_len0, uint32_t comp_type)
 {
 	const struct rndis_comp_hdr *comp;
 	size_t comp_len, min_complen = *comp_len0;
 
 	KASSERT(rid > HN_RNDIS_RID_COMPAT_MAX, ("invalid rid %u\n", rid));
 	KASSERT(min_complen >= sizeof(*comp),
 	    ("invalid minimum complete len %zu", min_complen));
 
 	/*
 	 * Execute the xact setup by the caller.
 	 */
 	comp = hn_rndis_xact_exec1(sc, xact, reqlen, &hn_nvs_sendctx_none,
 	    &comp_len);
 	if (comp == NULL)
 		return (NULL);
 
 	/*
 	 * Check this RNDIS complete message.
 	 */
 	if (comp_len < min_complen) {
 		if (comp_len >= sizeof(*comp)) {
 			/* rm_status field is valid */
 			if_printf(sc->hn_ifp, "invalid RNDIS comp len %zu, "
 			    "status 0x%08x\n", comp_len, comp->rm_status);
 		} else {
 			if_printf(sc->hn_ifp, "invalid RNDIS comp len %zu\n",
 			    comp_len);
 		}
 		return (NULL);
 	}
 	if (comp->rm_len < min_complen) {
 		if_printf(sc->hn_ifp, "invalid RNDIS comp msglen %u\n",
 		    comp->rm_len);
 		return (NULL);
 	}
 	if (comp->rm_type != comp_type) {
 		if_printf(sc->hn_ifp, "unexpected RNDIS comp 0x%08x, "
 		    "expect 0x%08x\n", comp->rm_type, comp_type);
 		return (NULL);
 	}
 	if (comp->rm_rid != rid) {
 		if_printf(sc->hn_ifp, "RNDIS comp rid mismatch %u, "
 		    "expect %u\n", comp->rm_rid, rid);
 		return (NULL);
 	}
 	/* All pass! */
 	*comp_len0 = comp_len;
 	return (comp);
 }
 
 static int
 hn_rndis_query(struct hn_softc *sc, uint32_t oid,
     const void *idata, size_t idlen, void *odata, size_t *odlen0)
 {
 
 	return (hn_rndis_query2(sc, oid, idata, idlen, odata, odlen0, *odlen0));
 }
 
 static int
 hn_rndis_query2(struct hn_softc *sc, uint32_t oid,
     const void *idata, size_t idlen, void *odata, size_t *odlen0,
     size_t min_odlen)
 {
 	struct rndis_query_req *req;
 	const struct rndis_query_comp *comp;
 	struct vmbus_xact *xact;
 	size_t reqlen, odlen = *odlen0, comp_len;
 	int error, ofs;
 	uint32_t rid;
 
 	reqlen = sizeof(*req) + idlen;
 	xact = vmbus_xact_get(sc->hn_xact, reqlen);
 	if (xact == NULL) {
 		if_printf(sc->hn_ifp, "no xact for RNDIS query 0x%08x\n", oid);
 		return (ENXIO);
 	}
 	rid = hn_rndis_rid(sc);
 	req = vmbus_xact_req_data(xact);
 	req->rm_type = REMOTE_NDIS_QUERY_MSG;
 	req->rm_len = reqlen;
 	req->rm_rid = rid;
 	req->rm_oid = oid;
 	/*
 	 * XXX
 	 * This is _not_ RNDIS Spec conforming:
 	 * "This MUST be set to 0 when there is no input data
 	 *  associated with the OID."
 	 *
 	 * If this field was set to 0 according to the RNDIS Spec,
 	 * Hyper-V would set non-SUCCESS status in the query
 	 * completion.
 	 */
 	req->rm_infobufoffset = RNDIS_QUERY_REQ_INFOBUFOFFSET;
 
 	if (idlen > 0) {
 		req->rm_infobuflen = idlen;
 		/* Input data immediately follows RNDIS query. */
 		memcpy(req + 1, idata, idlen);
 	}
 
 	comp_len = sizeof(*comp) + min_odlen;
 	comp = hn_rndis_xact_execute(sc, xact, rid, reqlen, &comp_len,
 	    REMOTE_NDIS_QUERY_CMPLT);
 	if (comp == NULL) {
 		if_printf(sc->hn_ifp, "exec RNDIS query 0x%08x failed\n", oid);
 		error = EIO;
 		goto done;
 	}
 
 	if (comp->rm_status != RNDIS_STATUS_SUCCESS) {
 		if_printf(sc->hn_ifp, "RNDIS query 0x%08x failed: "
 		    "status 0x%08x\n", oid, comp->rm_status);
 		error = EIO;
 		goto done;
 	}
 	if (comp->rm_infobuflen == 0 || comp->rm_infobufoffset == 0) {
 		/* No output data! */
 		if_printf(sc->hn_ifp, "RNDIS query 0x%08x, no data\n", oid);
 		*odlen0 = 0;
 		error = 0;
 		goto done;
 	}
 
 	/*
 	 * Check output data length and offset.
 	 */
 	/* ofs is the offset from the beginning of comp. */
 	ofs = RNDIS_QUERY_COMP_INFOBUFOFFSET_ABS(comp->rm_infobufoffset);
 	if (ofs < sizeof(*comp) || ofs + comp->rm_infobuflen > comp_len) {
 		if_printf(sc->hn_ifp, "RNDIS query invalid comp ib off/len, "
 		    "%u/%u\n", comp->rm_infobufoffset, comp->rm_infobuflen);
 		error = EINVAL;
 		goto done;
 	}
 
 	/*
 	 * Save output data.
 	 */
 	if (comp->rm_infobuflen < odlen)
 		odlen = comp->rm_infobuflen;
 	memcpy(odata, ((const uint8_t *)comp) + ofs, odlen);
 	*odlen0 = odlen;
 
 	error = 0;
 done:
 	vmbus_xact_put(xact);
 	return (error);
 }
 
 int
 hn_rndis_query_rsscaps(struct hn_softc *sc, int *rxr_cnt0)
 {
 	struct ndis_rss_caps in, caps;
 	size_t caps_len;
 	int error, indsz, rxr_cnt, hash_fnidx;
 	uint32_t hash_func = 0, hash_types = 0;
 
 	*rxr_cnt0 = 0;
 
 	if (sc->hn_ndis_ver < HN_NDIS_VERSION_6_20)
 		return (EOPNOTSUPP);
 
 	memset(&in, 0, sizeof(in));
 	in.ndis_hdr.ndis_type = NDIS_OBJTYPE_RSS_CAPS;
 	in.ndis_hdr.ndis_rev = NDIS_RSS_CAPS_REV_2;
 	in.ndis_hdr.ndis_size = NDIS_RSS_CAPS_SIZE;
 
 	caps_len = NDIS_RSS_CAPS_SIZE;
 	error = hn_rndis_query2(sc, OID_GEN_RECEIVE_SCALE_CAPABILITIES,
 	    &in, NDIS_RSS_CAPS_SIZE, &caps, &caps_len, NDIS_RSS_CAPS_SIZE_6_0);
 	if (error)
 		return (error);
 
 	/*
 	 * Preliminary verification.
 	 */
 	if (caps.ndis_hdr.ndis_type != NDIS_OBJTYPE_RSS_CAPS) {
 		if_printf(sc->hn_ifp, "invalid NDIS objtype 0x%02x\n",
 		    caps.ndis_hdr.ndis_type);
 		return (EINVAL);
 	}
 	if (caps.ndis_hdr.ndis_rev < NDIS_RSS_CAPS_REV_1) {
 		if_printf(sc->hn_ifp, "invalid NDIS objrev 0x%02x\n",
 		    caps.ndis_hdr.ndis_rev);
 		return (EINVAL);
 	}
 	if (caps.ndis_hdr.ndis_size > caps_len) {
 		if_printf(sc->hn_ifp, "invalid NDIS objsize %u, "
 		    "data size %zu\n", caps.ndis_hdr.ndis_size, caps_len);
 		return (EINVAL);
 	} else if (caps.ndis_hdr.ndis_size < NDIS_RSS_CAPS_SIZE_6_0) {
 		if_printf(sc->hn_ifp, "invalid NDIS objsize %u\n",
 		    caps.ndis_hdr.ndis_size);
 		return (EINVAL);
 	}
 
 	/*
 	 * Save information for later RSS configuration.
 	 */
 	if (caps.ndis_nrxr == 0) {
 		if_printf(sc->hn_ifp, "0 RX rings!?\n");
 		return (EINVAL);
 	}
 	if (bootverbose)
 		if_printf(sc->hn_ifp, "%u RX rings\n", caps.ndis_nrxr);
 	rxr_cnt = caps.ndis_nrxr;
 
 	if (caps.ndis_hdr.ndis_size == NDIS_RSS_CAPS_SIZE &&
 	    caps.ndis_hdr.ndis_rev >= NDIS_RSS_CAPS_REV_2) {
 		if (caps.ndis_nind > NDIS_HASH_INDCNT) {
 			if_printf(sc->hn_ifp,
 			    "too many RSS indirect table entries %u\n",
 			    caps.ndis_nind);
 			return (EOPNOTSUPP);
 		}
 		if (!powerof2(caps.ndis_nind)) {
 			if_printf(sc->hn_ifp, "RSS indirect table size is not "
 			    "power-of-2 %u\n", caps.ndis_nind);
 		}
 
 		if (bootverbose) {
 			if_printf(sc->hn_ifp, "RSS indirect table size %u\n",
 			    caps.ndis_nind);
 		}
 		indsz = caps.ndis_nind;
 	} else {
 		indsz = NDIS_HASH_INDCNT;
 	}
 	if (indsz < rxr_cnt) {
 		if_printf(sc->hn_ifp, "# of RX rings (%d) > "
 		    "RSS indirect table size %d\n", rxr_cnt, indsz);
 		rxr_cnt = indsz;
 	}
 
 	/*
 	 * NOTE:
 	 * Toeplitz is at the lowest bit, and it is prefered; so ffs(),
 	 * instead of fls(), is used here.
 	 */
 	hash_fnidx = ffs(caps.ndis_caps & NDIS_RSS_CAP_HASHFUNC_MASK);
 	if (hash_fnidx == 0) {
 		if_printf(sc->hn_ifp, "no hash functions, caps 0x%08x\n",
 		    caps.ndis_caps);
 		return (EOPNOTSUPP);
 	}
 	hash_func = 1 << (hash_fnidx - 1); /* ffs is 1-based */
 
 	if (caps.ndis_caps & NDIS_RSS_CAP_IPV4)
 		hash_types |= NDIS_HASH_IPV4 | NDIS_HASH_TCP_IPV4;
 	if (caps.ndis_caps & NDIS_RSS_CAP_IPV6)
 		hash_types |= NDIS_HASH_IPV6 | NDIS_HASH_TCP_IPV6;
 	if (caps.ndis_caps & NDIS_RSS_CAP_IPV6_EX)
 		hash_types |= NDIS_HASH_IPV6_EX | NDIS_HASH_TCP_IPV6_EX;
 	if (hash_types == 0) {
 		if_printf(sc->hn_ifp, "no hash types, caps 0x%08x\n",
 		    caps.ndis_caps);
 		return (EOPNOTSUPP);
 	}
 
 	/* Commit! */
 	sc->hn_rss_ind_size = indsz;
 	sc->hn_rss_hash = hash_func | hash_types;
 	*rxr_cnt0 = rxr_cnt;
 	return (0);
 }
 
 static int
 hn_rndis_set(struct hn_softc *sc, uint32_t oid, const void *data, size_t dlen)
 {
 	struct rndis_set_req *req;
 	const struct rndis_set_comp *comp;
 	struct vmbus_xact *xact;
 	size_t reqlen, comp_len;
 	uint32_t rid;
 	int error;
 
 	KASSERT(dlen > 0, ("invalid dlen %zu", dlen));
 
 	reqlen = sizeof(*req) + dlen;
 	xact = vmbus_xact_get(sc->hn_xact, reqlen);
 	if (xact == NULL) {
 		if_printf(sc->hn_ifp, "no xact for RNDIS set 0x%08x\n", oid);
 		return (ENXIO);
 	}
 	rid = hn_rndis_rid(sc);
 	req = vmbus_xact_req_data(xact);
 	req->rm_type = REMOTE_NDIS_SET_MSG;
 	req->rm_len = reqlen;
 	req->rm_rid = rid;
 	req->rm_oid = oid;
 	req->rm_infobuflen = dlen;
 	req->rm_infobufoffset = RNDIS_SET_REQ_INFOBUFOFFSET;
 	/* Data immediately follows RNDIS set. */
 	memcpy(req + 1, data, dlen);
 
 	comp_len = sizeof(*comp);
 	comp = hn_rndis_xact_execute(sc, xact, rid, reqlen, &comp_len,
 	    REMOTE_NDIS_SET_CMPLT);
 	if (comp == NULL) {
 		if_printf(sc->hn_ifp, "exec RNDIS set 0x%08x failed\n", oid);
 		error = EIO;
 		goto done;
 	}
 
 	if (comp->rm_status != RNDIS_STATUS_SUCCESS) {
 		if_printf(sc->hn_ifp, "RNDIS set 0x%08x failed: "
 		    "status 0x%08x\n", oid, comp->rm_status);
 		error = EIO;
 		goto done;
 	}
 	error = 0;
 done:
 	vmbus_xact_put(xact);
 	return (error);
 }
 
 static int
 hn_rndis_conf_offload(struct hn_softc *sc, int mtu)
 {
 	struct ndis_offload hwcaps;
 	struct ndis_offload_params params;
 	uint32_t caps = 0;
 	size_t paramsz;
 	int error, tso_maxsz, tso_minsg;
 
 	error = hn_rndis_query_hwcaps(sc, &hwcaps);
 	if (error) {
 		if_printf(sc->hn_ifp, "hwcaps query failed: %d\n", error);
 		return (error);
 	}
 
 	/* NOTE: 0 means "no change" */
 	memset(&params, 0, sizeof(params));
 
 	params.ndis_hdr.ndis_type = NDIS_OBJTYPE_DEFAULT;
 	if (sc->hn_ndis_ver < HN_NDIS_VERSION_6_30) {
 		params.ndis_hdr.ndis_rev = NDIS_OFFLOAD_PARAMS_REV_2;
 		paramsz = NDIS_OFFLOAD_PARAMS_SIZE_6_1;
 	} else {
 		params.ndis_hdr.ndis_rev = NDIS_OFFLOAD_PARAMS_REV_3;
 		paramsz = NDIS_OFFLOAD_PARAMS_SIZE;
 	}
 	params.ndis_hdr.ndis_size = paramsz;
 
 	/*
 	 * TSO4/TSO6 setup.
 	 */
 	tso_maxsz = IP_MAXPACKET;
 	tso_minsg = 2;
 	if (hwcaps.ndis_lsov2.ndis_ip4_encap & NDIS_OFFLOAD_ENCAP_8023) {
 		caps |= HN_CAP_TSO4;
 		params.ndis_lsov2_ip4 = NDIS_OFFLOAD_LSOV2_ON;
 
 		if (hwcaps.ndis_lsov2.ndis_ip4_maxsz < tso_maxsz)
 			tso_maxsz = hwcaps.ndis_lsov2.ndis_ip4_maxsz;
 		if (hwcaps.ndis_lsov2.ndis_ip4_minsg > tso_minsg)
 			tso_minsg = hwcaps.ndis_lsov2.ndis_ip4_minsg;
 	}
 	if ((hwcaps.ndis_lsov2.ndis_ip6_encap & NDIS_OFFLOAD_ENCAP_8023) &&
 	    (hwcaps.ndis_lsov2.ndis_ip6_opts & HN_NDIS_LSOV2_CAP_IP6) ==
 	    HN_NDIS_LSOV2_CAP_IP6) {
 		caps |= HN_CAP_TSO6;
 		params.ndis_lsov2_ip6 = NDIS_OFFLOAD_LSOV2_ON;
 
 		if (hwcaps.ndis_lsov2.ndis_ip6_maxsz < tso_maxsz)
 			tso_maxsz = hwcaps.ndis_lsov2.ndis_ip6_maxsz;
 		if (hwcaps.ndis_lsov2.ndis_ip6_minsg > tso_minsg)
 			tso_minsg = hwcaps.ndis_lsov2.ndis_ip6_minsg;
 	}
 	sc->hn_ndis_tso_szmax = 0;
 	sc->hn_ndis_tso_sgmin = 0;
 	if (caps & (HN_CAP_TSO4 | HN_CAP_TSO6)) {
 		KASSERT(tso_maxsz <= IP_MAXPACKET,
 		    ("invalid NDIS TSO maxsz %d", tso_maxsz));
 		KASSERT(tso_minsg >= 2,
 		    ("invalid NDIS TSO minsg %d", tso_minsg));
 		if (tso_maxsz < tso_minsg * mtu) {
 			if_printf(sc->hn_ifp, "invalid NDIS TSO config: "
 			    "maxsz %d, minsg %d, mtu %d; "
 			    "disable TSO4 and TSO6\n",
 			    tso_maxsz, tso_minsg, mtu);
 			caps &= ~(HN_CAP_TSO4 | HN_CAP_TSO6);
 			params.ndis_lsov2_ip4 = NDIS_OFFLOAD_LSOV2_OFF;
 			params.ndis_lsov2_ip6 = NDIS_OFFLOAD_LSOV2_OFF;
 		} else {
 			sc->hn_ndis_tso_szmax = tso_maxsz;
 			sc->hn_ndis_tso_sgmin = tso_minsg;
 			if (bootverbose) {
 				if_printf(sc->hn_ifp, "NDIS TSO "
 				    "szmax %d sgmin %d\n",
 				    sc->hn_ndis_tso_szmax,
 				    sc->hn_ndis_tso_sgmin);
 			}
 		}
 	}
 
 	/* IPv4 checksum */
 	if ((hwcaps.ndis_csum.ndis_ip4_txcsum & HN_NDIS_TXCSUM_CAP_IP4) ==
 	    HN_NDIS_TXCSUM_CAP_IP4) {
 		caps |= HN_CAP_IPCS;
 		params.ndis_ip4csum = NDIS_OFFLOAD_PARAM_TX;
 	}
 	if (hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_IP4) {
 		if (params.ndis_ip4csum == NDIS_OFFLOAD_PARAM_TX)
 			params.ndis_ip4csum = NDIS_OFFLOAD_PARAM_TXRX;
 		else
 			params.ndis_ip4csum = NDIS_OFFLOAD_PARAM_RX;
 	}
 
 	/* TCP4 checksum */
 	if ((hwcaps.ndis_csum.ndis_ip4_txcsum & HN_NDIS_TXCSUM_CAP_TCP4) ==
 	    HN_NDIS_TXCSUM_CAP_TCP4) {
 		caps |= HN_CAP_TCP4CS;
 		params.ndis_tcp4csum = NDIS_OFFLOAD_PARAM_TX;
 	}
 	if (hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_TCP4) {
 		if (params.ndis_tcp4csum == NDIS_OFFLOAD_PARAM_TX)
 			params.ndis_tcp4csum = NDIS_OFFLOAD_PARAM_TXRX;
 		else
 			params.ndis_tcp4csum = NDIS_OFFLOAD_PARAM_RX;
 	}
 
 	/* UDP4 checksum */
 	if (hwcaps.ndis_csum.ndis_ip4_txcsum & NDIS_TXCSUM_CAP_UDP4) {
 		caps |= HN_CAP_UDP4CS;
 		params.ndis_udp4csum = NDIS_OFFLOAD_PARAM_TX;
 	}
 	if (hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_UDP4) {
 		if (params.ndis_udp4csum == NDIS_OFFLOAD_PARAM_TX)
 			params.ndis_udp4csum = NDIS_OFFLOAD_PARAM_TXRX;
 		else
 			params.ndis_udp4csum = NDIS_OFFLOAD_PARAM_RX;
 	}
 
 	/* TCP6 checksum */
 	if ((hwcaps.ndis_csum.ndis_ip6_txcsum & HN_NDIS_TXCSUM_CAP_TCP6) ==
 	    HN_NDIS_TXCSUM_CAP_TCP6) {
 		caps |= HN_CAP_TCP6CS;
 		params.ndis_tcp6csum = NDIS_OFFLOAD_PARAM_TX;
 	}
 	if (hwcaps.ndis_csum.ndis_ip6_rxcsum & NDIS_RXCSUM_CAP_TCP6) {
 		if (params.ndis_tcp6csum == NDIS_OFFLOAD_PARAM_TX)
 			params.ndis_tcp6csum = NDIS_OFFLOAD_PARAM_TXRX;
 		else
 			params.ndis_tcp6csum = NDIS_OFFLOAD_PARAM_RX;
 	}
 
 	/* UDP6 checksum */
 	if ((hwcaps.ndis_csum.ndis_ip6_txcsum & HN_NDIS_TXCSUM_CAP_UDP6) ==
 	    HN_NDIS_TXCSUM_CAP_UDP6) {
 		caps |= HN_CAP_UDP6CS;
 		params.ndis_udp6csum = NDIS_OFFLOAD_PARAM_TX;
 	}
 	if (hwcaps.ndis_csum.ndis_ip6_rxcsum & NDIS_RXCSUM_CAP_UDP6) {
 		if (params.ndis_udp6csum == NDIS_OFFLOAD_PARAM_TX)
 			params.ndis_udp6csum = NDIS_OFFLOAD_PARAM_TXRX;
 		else
 			params.ndis_udp6csum = NDIS_OFFLOAD_PARAM_RX;
 	}
 
 	if (bootverbose) {
 		if_printf(sc->hn_ifp, "offload csum: "
 		    "ip4 %u, tcp4 %u, udp4 %u, tcp6 %u, udp6 %u\n",
 		    params.ndis_ip4csum,
 		    params.ndis_tcp4csum,
 		    params.ndis_udp4csum,
 		    params.ndis_tcp6csum,
 		    params.ndis_udp6csum);
 		if_printf(sc->hn_ifp, "offload lsov2: ip4 %u, ip6 %u\n",
 		    params.ndis_lsov2_ip4,
 		    params.ndis_lsov2_ip6);
 	}
 
 	error = hn_rndis_set(sc, OID_TCP_OFFLOAD_PARAMETERS, &params, paramsz);
 	if (error) {
 		if_printf(sc->hn_ifp, "offload config failed: %d\n", error);
 		return (error);
 	}
 
 	if (bootverbose)
 		if_printf(sc->hn_ifp, "offload config done\n");
 	sc->hn_caps |= caps;
 	return (0);
 }
 
 int
 hn_rndis_conf_rss(struct hn_softc *sc, uint16_t flags)
 {
 	struct ndis_rssprm_toeplitz *rss = &sc->hn_rss;
 	struct ndis_rss_params *prm = &rss->rss_params;
 	int error, rss_size;
 
 	/*
 	 * Only NDIS 6.20+ is supported:
 	 * We only support 4bytes element in indirect table, which has been
 	 * adopted since NDIS 6.20.
 	 */
 	KASSERT(sc->hn_ndis_ver >= HN_NDIS_VERSION_6_20,
 	    ("NDIS 6.20+ is required, NDIS version 0x%08x", sc->hn_ndis_ver));
 
 	/* XXX only one can be specified through, popcnt? */
 	KASSERT((sc->hn_rss_hash & NDIS_HASH_FUNCTION_MASK), ("no hash func"));
 	KASSERT((sc->hn_rss_hash & NDIS_HASH_TYPE_MASK), ("no hash types"));
 	KASSERT(sc->hn_rss_ind_size > 0, ("no indirect table size"));
 
 	if (bootverbose) {
 		if_printf(sc->hn_ifp, "RSS indirect table size %d, "
 		    "hash 0x%08x\n", sc->hn_rss_ind_size, sc->hn_rss_hash);
 	}
 
 	/*
 	 * NOTE:
 	 * DO NOT whack rss_key and rss_ind, which are setup by the caller.
 	 */
 	memset(prm, 0, sizeof(*prm));
 	rss_size = NDIS_RSSPRM_TOEPLITZ_SIZE(sc->hn_rss_ind_size);
 
 	prm->ndis_hdr.ndis_type = NDIS_OBJTYPE_RSS_PARAMS;
 	prm->ndis_hdr.ndis_rev = NDIS_RSS_PARAMS_REV_2;
 	prm->ndis_hdr.ndis_size = rss_size;
 	prm->ndis_flags = flags;
 	prm->ndis_hash = sc->hn_rss_hash;
 	prm->ndis_indsize = sizeof(rss->rss_ind[0]) * sc->hn_rss_ind_size;
 	prm->ndis_indoffset =
 	    __offsetof(struct ndis_rssprm_toeplitz, rss_ind[0]);
 	prm->ndis_keysize = sizeof(rss->rss_key);
 	prm->ndis_keyoffset =
 	    __offsetof(struct ndis_rssprm_toeplitz, rss_key[0]);
 
 	error = hn_rndis_set(sc, OID_GEN_RECEIVE_SCALE_PARAMETERS,
 	    rss, rss_size);
 	if (error) {
 		if_printf(sc->hn_ifp, "RSS config failed: %d\n", error);
 	} else {
 		if (bootverbose)
 			if_printf(sc->hn_ifp, "RSS config done\n");
 	}
 	return (error);
 }
 
 int
 hn_rndis_set_rxfilter(struct hn_softc *sc, uint32_t filter)
 {
 	int error;
 
 	error = hn_rndis_set(sc, OID_GEN_CURRENT_PACKET_FILTER,
 	    &filter, sizeof(filter));
 	if (error) {
 		if_printf(sc->hn_ifp, "set RX filter 0x%08x failed: %d\n",
 		    filter, error);
 	} else {
 		if (bootverbose) {
 			if_printf(sc->hn_ifp, "set RX filter 0x%08x done\n",
 			    filter);
 		}
 	}
 	return (error);
 }
 
 static int
 hn_rndis_init(struct hn_softc *sc)
 {
 	struct rndis_init_req *req;
 	const struct rndis_init_comp *comp;
 	struct vmbus_xact *xact;
 	size_t comp_len;
 	uint32_t rid;
 	int error;
 
 	xact = vmbus_xact_get(sc->hn_xact, sizeof(*req));
 	if (xact == NULL) {
 		if_printf(sc->hn_ifp, "no xact for RNDIS init\n");
 		return (ENXIO);
 	}
 	rid = hn_rndis_rid(sc);
 	req = vmbus_xact_req_data(xact);
 	req->rm_type = REMOTE_NDIS_INITIALIZE_MSG;
 	req->rm_len = sizeof(*req);
 	req->rm_rid = rid;
 	req->rm_ver_major = RNDIS_VERSION_MAJOR;
 	req->rm_ver_minor = RNDIS_VERSION_MINOR;
 	req->rm_max_xfersz = HN_RNDIS_XFER_SIZE;
 
 	comp_len = RNDIS_INIT_COMP_SIZE_MIN;
 	comp = hn_rndis_xact_execute(sc, xact, rid, sizeof(*req), &comp_len,
 	    REMOTE_NDIS_INITIALIZE_CMPLT);
 	if (comp == NULL) {
 		if_printf(sc->hn_ifp, "exec RNDIS init failed\n");
 		error = EIO;
 		goto done;
 	}
 
 	if (comp->rm_status != RNDIS_STATUS_SUCCESS) {
 		if_printf(sc->hn_ifp, "RNDIS init failed: status 0x%08x\n",
 		    comp->rm_status);
 		error = EIO;
 		goto done;
 	}
 	sc->hn_rndis_agg_size = comp->rm_pktmaxsz;
 	sc->hn_rndis_agg_pkts = comp->rm_pktmaxcnt;
 	sc->hn_rndis_agg_align = 1U << comp->rm_align;
 
 	if (bootverbose) {
 		if_printf(sc->hn_ifp, "RNDIS ver %u.%u, pktsz %u, pktcnt %u, "
 		    "align %u\n", comp->rm_ver_major, comp->rm_ver_minor,
 		    sc->hn_rndis_agg_size, sc->hn_rndis_agg_pkts,
 		    sc->hn_rndis_agg_align);
 	}
 	error = 0;
 done:
 	vmbus_xact_put(xact);
 	return (error);
 }
 
 static int
 hn_rndis_halt(struct hn_softc *sc)
 {
 	struct vmbus_xact *xact;
 	struct rndis_halt_req *halt;
 	struct hn_nvs_sendctx sndc;
 	size_t comp_len;
 
 	xact = vmbus_xact_get(sc->hn_xact, sizeof(*halt));
 	if (xact == NULL) {
 		if_printf(sc->hn_ifp, "no xact for RNDIS halt\n");
 		return (ENXIO);
 	}
 	halt = vmbus_xact_req_data(xact);
 	halt->rm_type = REMOTE_NDIS_HALT_MSG;
 	halt->rm_len = sizeof(*halt);
 	halt->rm_rid = hn_rndis_rid(sc);
 
 	/* No RNDIS completion; rely on NVS message send completion */
 	hn_nvs_sendctx_init(&sndc, hn_nvs_sent_xact, xact);
 	hn_rndis_xact_exec1(sc, xact, sizeof(*halt), &sndc, &comp_len);
 
 	vmbus_xact_put(xact);
 	if (bootverbose)
 		if_printf(sc->hn_ifp, "RNDIS halt done\n");
 	return (0);
 }
 
 static int
 hn_rndis_query_hwcaps(struct hn_softc *sc, struct ndis_offload *caps)
 {
 	struct ndis_offload in;
 	size_t caps_len, size;
 	int error;
 
 	memset(&in, 0, sizeof(in));
 	in.ndis_hdr.ndis_type = NDIS_OBJTYPE_OFFLOAD;
 	if (sc->hn_ndis_ver >= HN_NDIS_VERSION_6_30) {
 		in.ndis_hdr.ndis_rev = NDIS_OFFLOAD_REV_3;
 		size = NDIS_OFFLOAD_SIZE;
 	} else if (sc->hn_ndis_ver >= HN_NDIS_VERSION_6_1) {
 		in.ndis_hdr.ndis_rev = NDIS_OFFLOAD_REV_2;
 		size = NDIS_OFFLOAD_SIZE_6_1;
 	} else {
 		in.ndis_hdr.ndis_rev = NDIS_OFFLOAD_REV_1;
 		size = NDIS_OFFLOAD_SIZE_6_0;
 	}
 	in.ndis_hdr.ndis_size = size;
 
 	caps_len = NDIS_OFFLOAD_SIZE;
 	error = hn_rndis_query2(sc, OID_TCP_OFFLOAD_HARDWARE_CAPABILITIES,
 	    &in, size, caps, &caps_len, NDIS_OFFLOAD_SIZE_6_0);
 	if (error)
 		return (error);
 
 	/*
 	 * Preliminary verification.
 	 */
 	if (caps->ndis_hdr.ndis_type != NDIS_OBJTYPE_OFFLOAD) {
 		if_printf(sc->hn_ifp, "invalid NDIS objtype 0x%02x\n",
 		    caps->ndis_hdr.ndis_type);
 		return (EINVAL);
 	}
 	if (caps->ndis_hdr.ndis_rev < NDIS_OFFLOAD_REV_1) {
 		if_printf(sc->hn_ifp, "invalid NDIS objrev 0x%02x\n",
 		    caps->ndis_hdr.ndis_rev);
 		return (EINVAL);
 	}
 	if (caps->ndis_hdr.ndis_size > caps_len) {
 		if_printf(sc->hn_ifp, "invalid NDIS objsize %u, "
 		    "data size %zu\n", caps->ndis_hdr.ndis_size, caps_len);
 		return (EINVAL);
 	} else if (caps->ndis_hdr.ndis_size < NDIS_OFFLOAD_SIZE_6_0) {
 		if_printf(sc->hn_ifp, "invalid NDIS objsize %u\n",
 		    caps->ndis_hdr.ndis_size);
 		return (EINVAL);
 	}
 
 	if (bootverbose) {
 		/*
 		 * NOTE:
 		 * caps->ndis_hdr.ndis_size MUST be checked before accessing
 		 * NDIS 6.1+ specific fields.
 		 */
 		if_printf(sc->hn_ifp, "hwcaps rev %u\n",
 		    caps->ndis_hdr.ndis_rev);
 
 		if_printf(sc->hn_ifp, "hwcaps csum: "
 		    "ip4 tx 0x%x/0x%x rx 0x%x/0x%x, "
 		    "ip6 tx 0x%x/0x%x rx 0x%x/0x%x\n",
 		    caps->ndis_csum.ndis_ip4_txcsum,
 		    caps->ndis_csum.ndis_ip4_txenc,
 		    caps->ndis_csum.ndis_ip4_rxcsum,
 		    caps->ndis_csum.ndis_ip4_rxenc,
 		    caps->ndis_csum.ndis_ip6_txcsum,
 		    caps->ndis_csum.ndis_ip6_txenc,
 		    caps->ndis_csum.ndis_ip6_rxcsum,
 		    caps->ndis_csum.ndis_ip6_rxenc);
 		if_printf(sc->hn_ifp, "hwcaps lsov2: "
 		    "ip4 maxsz %u minsg %u encap 0x%x, "
 		    "ip6 maxsz %u minsg %u encap 0x%x opts 0x%x\n",
 		    caps->ndis_lsov2.ndis_ip4_maxsz,
 		    caps->ndis_lsov2.ndis_ip4_minsg,
 		    caps->ndis_lsov2.ndis_ip4_encap,
 		    caps->ndis_lsov2.ndis_ip6_maxsz,
 		    caps->ndis_lsov2.ndis_ip6_minsg,
 		    caps->ndis_lsov2.ndis_ip6_encap,
 		    caps->ndis_lsov2.ndis_ip6_opts);
 	}
 	return (0);
 }
 
 int
 hn_rndis_attach(struct hn_softc *sc, int mtu)
 {
 	int error;
 
 	/*
 	 * Initialize RNDIS.
 	 */
 	error = hn_rndis_init(sc);
 	if (error)
 		return (error);
 
 	/*
 	 * Configure NDIS offload settings.
-	 * XXX no offloading, if error happened?
 	 */
 	hn_rndis_conf_offload(sc, mtu);
 	return (0);
 }
 
 void
 hn_rndis_detach(struct hn_softc *sc)
 {
 
 	/* Halt the RNDIS. */
 	hn_rndis_halt(sc);
 }
Index: projects/clang391-import/sys/dev/hyperv/netvsc/if_hn.c
===================================================================
--- projects/clang391-import/sys/dev/hyperv/netvsc/if_hn.c	(revision 309262)
+++ projects/clang391-import/sys/dev/hyperv/netvsc/if_hn.c	(revision 309263)
@@ -1,5321 +1,5387 @@
 /*-
  * Copyright (c) 2010-2012 Citrix Inc.
  * Copyright (c) 2009-2012,2016 Microsoft Corp.
  * Copyright (c) 2012 NetApp Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 /*-
  * Copyright (c) 2004-2006 Kip Macy
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet6.h"
 #include "opt_inet.h"
 
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <sys/queue.h>
 #include <sys/lock.h>
 #include <sys/smp.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 #include <sys/taskqueue.h>
 #include <sys/buf_ring.h>
 
 #include <machine/atomic.h>
 #include <machine/in_cksum.h>
 
 #include <net/bpf.h>
 #include <net/ethernet.h>
 #include <net/if.h>
 #include <net/if_media.h>
 #include <net/if_types.h>
 #include <net/if_var.h>
 #include <net/rndis.h>
 
 #include <netinet/in_systm.h>
 #include <netinet/in.h>
 #include <netinet/ip.h>
 #include <netinet/ip6.h>
 #include <netinet/tcp.h>
 #include <netinet/tcp_lro.h>
 #include <netinet/udp.h>
 
 #include <dev/hyperv/include/hyperv.h>
 #include <dev/hyperv/include/hyperv_busdma.h>
 #include <dev/hyperv/include/vmbus.h>
 #include <dev/hyperv/include/vmbus_xact.h>
 
 #include <dev/hyperv/netvsc/ndis.h>
 #include <dev/hyperv/netvsc/if_hnreg.h>
 #include <dev/hyperv/netvsc/if_hnvar.h>
 #include <dev/hyperv/netvsc/hn_nvs.h>
 #include <dev/hyperv/netvsc/hn_rndis.h>
 
 #include "vmbus_if.h"
 
 #define HN_IFSTART_SUPPORT
 
 #define HN_RING_CNT_DEF_MAX		8
 
 /* YYY should get it from the underlying channel */
 #define HN_TX_DESC_CNT			512
 
 #define HN_RNDIS_PKT_LEN					\
 	(sizeof(struct rndis_packet_msg) +			\
 	 HN_RNDIS_PKTINFO_SIZE(HN_NDIS_HASH_VALUE_SIZE) +	\
 	 HN_RNDIS_PKTINFO_SIZE(NDIS_VLAN_INFO_SIZE) +		\
 	 HN_RNDIS_PKTINFO_SIZE(NDIS_LSO2_INFO_SIZE) +		\
 	 HN_RNDIS_PKTINFO_SIZE(NDIS_TXCSUM_INFO_SIZE))
 #define HN_RNDIS_PKT_BOUNDARY		PAGE_SIZE
 #define HN_RNDIS_PKT_ALIGN		CACHE_LINE_SIZE
 
 #define HN_TX_DATA_BOUNDARY		PAGE_SIZE
 #define HN_TX_DATA_MAXSIZE		IP_MAXPACKET
 #define HN_TX_DATA_SEGSIZE		PAGE_SIZE
 /* -1 for RNDIS packet message */
 #define HN_TX_DATA_SEGCNT_MAX		(HN_GPACNT_MAX - 1)
 
 #define HN_DIRECT_TX_SIZE_DEF		128
 
 #define HN_EARLY_TXEOF_THRESH		8
 
 #define HN_PKTBUF_LEN_DEF		(16 * 1024)
 
 #define HN_LROENT_CNT_DEF		128
 
 #define HN_LRO_LENLIM_MULTIRX_DEF	(12 * ETHERMTU)
 #define HN_LRO_LENLIM_DEF		(25 * ETHERMTU)
 /* YYY 2*MTU is a bit rough, but should be good enough. */
 #define HN_LRO_LENLIM_MIN(ifp)		(2 * (ifp)->if_mtu)
 
 #define HN_LRO_ACKCNT_DEF		1
 
 #define HN_LOCK_INIT(sc)		\
 	sx_init(&(sc)->hn_lock, device_get_nameunit((sc)->hn_dev))
 #define HN_LOCK_DESTROY(sc)		sx_destroy(&(sc)->hn_lock)
 #define HN_LOCK_ASSERT(sc)		sx_assert(&(sc)->hn_lock, SA_XLOCKED)
 #define HN_LOCK(sc)					\
 do {							\
 	while (sx_try_xlock(&(sc)->hn_lock) == 0)	\
 		DELAY(1000);				\
 } while (0)
 #define HN_UNLOCK(sc)			sx_xunlock(&(sc)->hn_lock)
 
 #define HN_CSUM_IP_MASK			(CSUM_IP | CSUM_IP_TCP | CSUM_IP_UDP)
 #define HN_CSUM_IP6_MASK		(CSUM_IP6_TCP | CSUM_IP6_UDP)
 #define HN_CSUM_IP_HWASSIST(sc)		\
 	((sc)->hn_tx_ring[0].hn_csum_assist & HN_CSUM_IP_MASK)
 #define HN_CSUM_IP6_HWASSIST(sc)	\
 	((sc)->hn_tx_ring[0].hn_csum_assist & HN_CSUM_IP6_MASK)
 
 #define HN_PKTSIZE_MIN(align)		\
 	roundup2(ETHER_MIN_LEN + ETHER_VLAN_ENCAP_LEN - ETHER_CRC_LEN + \
 	    HN_RNDIS_PKT_LEN, (align))
 #define HN_PKTSIZE(m, align)		\
 	roundup2((m)->m_pkthdr.len + HN_RNDIS_PKT_LEN, (align))
 
 struct hn_txdesc {
 #ifndef HN_USE_TXDESC_BUFRING
 	SLIST_ENTRY(hn_txdesc)		link;
 #endif
 	STAILQ_ENTRY(hn_txdesc)		agg_link;
 
 	/* Aggregated txdescs, in sending order. */
 	STAILQ_HEAD(, hn_txdesc)	agg_list;
 
 	/* The oldest packet, if transmission aggregation happens. */
 	struct mbuf			*m;
 	struct hn_tx_ring		*txr;
 	int				refs;
 	uint32_t			flags;	/* HN_TXD_FLAG_ */
 	struct hn_nvs_sendctx		send_ctx;
 	uint32_t			chim_index;
 	int				chim_size;
 
 	bus_dmamap_t			data_dmap;
 
 	bus_addr_t			rndis_pkt_paddr;
 	struct rndis_packet_msg		*rndis_pkt;
 	bus_dmamap_t			rndis_pkt_dmap;
 };
 
 #define HN_TXD_FLAG_ONLIST		0x0001
 #define HN_TXD_FLAG_DMAMAP		0x0002
 #define HN_TXD_FLAG_ONAGG		0x0004
 
 struct hn_rxinfo {
 	uint32_t			vlan_info;
 	uint32_t			csum_info;
 	uint32_t			hash_info;
 	uint32_t			hash_value;
 };
 
 #define HN_RXINFO_VLAN			0x0001
 #define HN_RXINFO_CSUM			0x0002
 #define HN_RXINFO_HASHINF		0x0004
 #define HN_RXINFO_HASHVAL		0x0008
 #define HN_RXINFO_ALL			\
 	(HN_RXINFO_VLAN |		\
 	 HN_RXINFO_CSUM |		\
 	 HN_RXINFO_HASHINF |		\
 	 HN_RXINFO_HASHVAL)
 
 #define HN_NDIS_VLAN_INFO_INVALID	0xffffffff
 #define HN_NDIS_RXCSUM_INFO_INVALID	0
 #define HN_NDIS_HASH_INFO_INVALID	0
 
 static int			hn_probe(device_t);
 static int			hn_attach(device_t);
 static int			hn_detach(device_t);
 static int			hn_shutdown(device_t);
 static void			hn_chan_callback(struct vmbus_channel *,
 				    void *);
 
 static void			hn_init(void *);
 static int			hn_ioctl(struct ifnet *, u_long, caddr_t);
 #ifdef HN_IFSTART_SUPPORT
 static void			hn_start(struct ifnet *);
 #endif
 static int			hn_transmit(struct ifnet *, struct mbuf *);
 static void			hn_xmit_qflush(struct ifnet *);
 static int			hn_ifmedia_upd(struct ifnet *);
 static void			hn_ifmedia_sts(struct ifnet *,
 				    struct ifmediareq *);
 
 static int			hn_rndis_rxinfo(const void *, int,
 				    struct hn_rxinfo *);
 static void			hn_rndis_rx_data(struct hn_rx_ring *,
 				    const void *, int);
 static void			hn_rndis_rx_status(struct hn_softc *,
 				    const void *, int);
 
 static void			hn_nvs_handle_notify(struct hn_softc *,
 				    const struct vmbus_chanpkt_hdr *);
 static void			hn_nvs_handle_comp(struct hn_softc *,
 				    struct vmbus_channel *,
 				    const struct vmbus_chanpkt_hdr *);
 static void			hn_nvs_handle_rxbuf(struct hn_rx_ring *,
 				    struct vmbus_channel *,
 				    const struct vmbus_chanpkt_hdr *);
 static void			hn_nvs_ack_rxbuf(struct hn_rx_ring *,
 				    struct vmbus_channel *, uint64_t);
 
 #if __FreeBSD_version >= 1100099
 static int			hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS);
 static int			hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS);
 #endif
 static int			hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS);
 static int			hn_chim_size_sysctl(SYSCTL_HANDLER_ARGS);
 #if __FreeBSD_version < 1100095
 static int			hn_rx_stat_int_sysctl(SYSCTL_HANDLER_ARGS);
 #else
 static int			hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS);
 #endif
 static int			hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS);
 static int			hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS);
 static int			hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS);
 static int			hn_ndis_version_sysctl(SYSCTL_HANDLER_ARGS);
 static int			hn_caps_sysctl(SYSCTL_HANDLER_ARGS);
 static int			hn_hwassist_sysctl(SYSCTL_HANDLER_ARGS);
 static int			hn_rxfilter_sysctl(SYSCTL_HANDLER_ARGS);
 static int			hn_rss_key_sysctl(SYSCTL_HANDLER_ARGS);
 static int			hn_rss_ind_sysctl(SYSCTL_HANDLER_ARGS);
 static int			hn_rss_hash_sysctl(SYSCTL_HANDLER_ARGS);
 static int			hn_txagg_size_sysctl(SYSCTL_HANDLER_ARGS);
 static int			hn_txagg_pkts_sysctl(SYSCTL_HANDLER_ARGS);
 static int			hn_txagg_pktmax_sysctl(SYSCTL_HANDLER_ARGS);
 static int			hn_txagg_align_sysctl(SYSCTL_HANDLER_ARGS);
 
 static void			hn_stop(struct hn_softc *);
 static void			hn_init_locked(struct hn_softc *);
 static int			hn_chan_attach(struct hn_softc *,
 				    struct vmbus_channel *);
 static void			hn_chan_detach(struct hn_softc *,
 				    struct vmbus_channel *);
 static int			hn_attach_subchans(struct hn_softc *);
 static void			hn_detach_allchans(struct hn_softc *);
 static void			hn_chan_rollup(struct hn_rx_ring *,
 				    struct hn_tx_ring *);
 static void			hn_set_ring_inuse(struct hn_softc *, int);
 static int			hn_synth_attach(struct hn_softc *, int);
 static void			hn_synth_detach(struct hn_softc *);
 static int			hn_synth_alloc_subchans(struct hn_softc *,
 				    int *);
+static bool			hn_synth_attachable(const struct hn_softc *);
 static void			hn_suspend(struct hn_softc *);
 static void			hn_suspend_data(struct hn_softc *);
 static void			hn_suspend_mgmt(struct hn_softc *);
 static void			hn_resume(struct hn_softc *);
 static void			hn_resume_data(struct hn_softc *);
 static void			hn_resume_mgmt(struct hn_softc *);
 static void			hn_suspend_mgmt_taskfunc(void *, int);
 static void			hn_chan_drain(struct hn_softc *,
 				    struct vmbus_channel *);
 
 static void			hn_update_link_status(struct hn_softc *);
 static void			hn_change_network(struct hn_softc *);
 static void			hn_link_taskfunc(void *, int);
 static void			hn_netchg_init_taskfunc(void *, int);
 static void			hn_netchg_status_taskfunc(void *, int);
 static void			hn_link_status(struct hn_softc *);
 
 static int			hn_create_rx_data(struct hn_softc *, int);
 static void			hn_destroy_rx_data(struct hn_softc *);
 static int			hn_check_iplen(const struct mbuf *, int);
 static int			hn_set_rxfilter(struct hn_softc *);
 static int			hn_rss_reconfig(struct hn_softc *);
-static void			hn_rss_ind_fixup(struct hn_softc *, int);
+static void			hn_rss_ind_fixup(struct hn_softc *);
 static int			hn_rxpkt(struct hn_rx_ring *, const void *,
 				    int, const struct hn_rxinfo *);
 
 static int			hn_tx_ring_create(struct hn_softc *, int);
 static void			hn_tx_ring_destroy(struct hn_tx_ring *);
 static int			hn_create_tx_data(struct hn_softc *, int);
 static void			hn_fixup_tx_data(struct hn_softc *);
 static void			hn_destroy_tx_data(struct hn_softc *);
 static void			hn_txdesc_dmamap_destroy(struct hn_txdesc *);
 static void			hn_txdesc_gc(struct hn_tx_ring *,
 				    struct hn_txdesc *);
 static int			hn_encap(struct ifnet *, struct hn_tx_ring *,
 				    struct hn_txdesc *, struct mbuf **);
 static int			hn_txpkt(struct ifnet *, struct hn_tx_ring *,
 				    struct hn_txdesc *);
 static void			hn_set_chim_size(struct hn_softc *, int);
 static void			hn_set_tso_maxsize(struct hn_softc *, int, int);
 static bool			hn_tx_ring_pending(struct hn_tx_ring *);
 static void			hn_tx_ring_qflush(struct hn_tx_ring *);
 static void			hn_resume_tx(struct hn_softc *, int);
 static void			hn_set_txagg(struct hn_softc *);
 static void			*hn_try_txagg(struct ifnet *,
 				    struct hn_tx_ring *, struct hn_txdesc *,
 				    int);
 static int			hn_get_txswq_depth(const struct hn_tx_ring *);
 static void			hn_txpkt_done(struct hn_nvs_sendctx *,
 				    struct hn_softc *, struct vmbus_channel *,
 				    const void *, int);
 static int			hn_txpkt_sglist(struct hn_tx_ring *,
 				    struct hn_txdesc *);
 static int			hn_txpkt_chim(struct hn_tx_ring *,
 				    struct hn_txdesc *);
 static int			hn_xmit(struct hn_tx_ring *, int);
 static void			hn_xmit_taskfunc(void *, int);
 static void			hn_xmit_txeof(struct hn_tx_ring *);
 static void			hn_xmit_txeof_taskfunc(void *, int);
 #ifdef HN_IFSTART_SUPPORT
 static int			hn_start_locked(struct hn_tx_ring *, int);
 static void			hn_start_taskfunc(void *, int);
 static void			hn_start_txeof(struct hn_tx_ring *);
 static void			hn_start_txeof_taskfunc(void *, int);
 #endif
 
 SYSCTL_NODE(_hw, OID_AUTO, hn, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
     "Hyper-V network interface");
 
 /* Trust tcp segements verification on host side. */
 static int			hn_trust_hosttcp = 1;
 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hosttcp, CTLFLAG_RDTUN,
     &hn_trust_hosttcp, 0,
     "Trust tcp segement verification on host side, "
     "when csum info is missing (global setting)");
 
 /* Trust udp datagrams verification on host side. */
 static int			hn_trust_hostudp = 1;
 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostudp, CTLFLAG_RDTUN,
     &hn_trust_hostudp, 0,
     "Trust udp datagram verification on host side, "
     "when csum info is missing (global setting)");
 
 /* Trust ip packets verification on host side. */
 static int			hn_trust_hostip = 1;
 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostip, CTLFLAG_RDTUN,
     &hn_trust_hostip, 0,
     "Trust ip packet verification on host side, "
     "when csum info is missing (global setting)");
 
 /* Limit TSO burst size */
 static int			hn_tso_maxlen = IP_MAXPACKET;
 SYSCTL_INT(_hw_hn, OID_AUTO, tso_maxlen, CTLFLAG_RDTUN,
     &hn_tso_maxlen, 0, "TSO burst limit");
 
 /* Limit chimney send size */
 static int			hn_tx_chimney_size = 0;
 SYSCTL_INT(_hw_hn, OID_AUTO, tx_chimney_size, CTLFLAG_RDTUN,
     &hn_tx_chimney_size, 0, "Chimney send packet size limit");
 
 /* Limit the size of packet for direct transmission */
 static int			hn_direct_tx_size = HN_DIRECT_TX_SIZE_DEF;
 SYSCTL_INT(_hw_hn, OID_AUTO, direct_tx_size, CTLFLAG_RDTUN,
     &hn_direct_tx_size, 0, "Size of the packet for direct transmission");
 
 /* # of LRO entries per RX ring */
 #if defined(INET) || defined(INET6)
 #if __FreeBSD_version >= 1100095
 static int			hn_lro_entry_count = HN_LROENT_CNT_DEF;
 SYSCTL_INT(_hw_hn, OID_AUTO, lro_entry_count, CTLFLAG_RDTUN,
     &hn_lro_entry_count, 0, "LRO entry count");
 #endif
 #endif
 
 /* Use shared TX taskqueue */
 static int			hn_share_tx_taskq = 0;
 SYSCTL_INT(_hw_hn, OID_AUTO, share_tx_taskq, CTLFLAG_RDTUN,
     &hn_share_tx_taskq, 0, "Enable shared TX taskqueue");
 
 #ifndef HN_USE_TXDESC_BUFRING
 static int			hn_use_txdesc_bufring = 0;
 #else
 static int			hn_use_txdesc_bufring = 1;
 #endif
 SYSCTL_INT(_hw_hn, OID_AUTO, use_txdesc_bufring, CTLFLAG_RD,
     &hn_use_txdesc_bufring, 0, "Use buf_ring for TX descriptors");
 
 /* Bind TX taskqueue to the target CPU */
 static int			hn_bind_tx_taskq = -1;
 SYSCTL_INT(_hw_hn, OID_AUTO, bind_tx_taskq, CTLFLAG_RDTUN,
     &hn_bind_tx_taskq, 0, "Bind TX taskqueue to the specified cpu");
 
 #ifdef HN_IFSTART_SUPPORT
 /* Use ifnet.if_start instead of ifnet.if_transmit */
 static int			hn_use_if_start = 0;
 SYSCTL_INT(_hw_hn, OID_AUTO, use_if_start, CTLFLAG_RDTUN,
     &hn_use_if_start, 0, "Use if_start TX method");
 #endif
 
 /* # of channels to use */
 static int			hn_chan_cnt = 0;
 SYSCTL_INT(_hw_hn, OID_AUTO, chan_cnt, CTLFLAG_RDTUN,
     &hn_chan_cnt, 0,
     "# of channels to use; each channel has one RX ring and one TX ring");
 
 /* # of transmit rings to use */
 static int			hn_tx_ring_cnt = 0;
 SYSCTL_INT(_hw_hn, OID_AUTO, tx_ring_cnt, CTLFLAG_RDTUN,
     &hn_tx_ring_cnt, 0, "# of TX rings to use");
 
 /* Software TX ring deptch */
 static int			hn_tx_swq_depth = 0;
 SYSCTL_INT(_hw_hn, OID_AUTO, tx_swq_depth, CTLFLAG_RDTUN,
     &hn_tx_swq_depth, 0, "Depth of IFQ or BUFRING");
 
 /* Enable sorted LRO, and the depth of the per-channel mbuf queue */
 #if __FreeBSD_version >= 1100095
 static u_int			hn_lro_mbufq_depth = 0;
 SYSCTL_UINT(_hw_hn, OID_AUTO, lro_mbufq_depth, CTLFLAG_RDTUN,
     &hn_lro_mbufq_depth, 0, "Depth of LRO mbuf queue");
 #endif
 
 /* Packet transmission aggregation size limit */
 static int			hn_tx_agg_size = -1;
 SYSCTL_INT(_hw_hn, OID_AUTO, tx_agg_size, CTLFLAG_RDTUN,
     &hn_tx_agg_size, 0, "Packet transmission aggregation size limit");
 
 /* Packet transmission aggregation count limit */
-static int			hn_tx_agg_pkts = 0;
+static int			hn_tx_agg_pkts = -1;
 SYSCTL_INT(_hw_hn, OID_AUTO, tx_agg_pkts, CTLFLAG_RDTUN,
     &hn_tx_agg_pkts, 0, "Packet transmission aggregation packet limit");
 
 static u_int			hn_cpu_index;	/* next CPU for channel */
 static struct taskqueue		*hn_tx_taskq;	/* shared TX taskqueue */
 
 static const uint8_t
 hn_rss_key_default[NDIS_HASH_KEYSIZE_TOEPLITZ] = {
 	0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
 	0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
 	0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
 	0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
 	0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa
 };
 
 static device_method_t hn_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		hn_probe),
 	DEVMETHOD(device_attach,	hn_attach),
 	DEVMETHOD(device_detach,	hn_detach),
 	DEVMETHOD(device_shutdown,	hn_shutdown),
 	DEVMETHOD_END
 };
 
 static driver_t hn_driver = {
 	"hn",
 	hn_methods,
 	sizeof(struct hn_softc)
 };
 
 static devclass_t hn_devclass;
 
 DRIVER_MODULE(hn, vmbus, hn_driver, hn_devclass, 0, 0);
 MODULE_VERSION(hn, 1);
 MODULE_DEPEND(hn, vmbus, 1, 1, 1);
 
 #if __FreeBSD_version >= 1100099
 static void
 hn_set_lro_lenlim(struct hn_softc *sc, int lenlim)
 {
 	int i;
 
 	for (i = 0; i < sc->hn_rx_ring_cnt; ++i)
 		sc->hn_rx_ring[i].hn_lro.lro_length_lim = lenlim;
 }
 #endif
 
 static int
 hn_txpkt_sglist(struct hn_tx_ring *txr, struct hn_txdesc *txd)
 {
 
 	KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID &&
 	    txd->chim_size == 0, ("invalid rndis sglist txd"));
 	return (hn_nvs_send_rndis_sglist(txr->hn_chan, HN_NVS_RNDIS_MTYPE_DATA,
 	    &txd->send_ctx, txr->hn_gpa, txr->hn_gpa_cnt));
 }
 
 static int
 hn_txpkt_chim(struct hn_tx_ring *txr, struct hn_txdesc *txd)
 {
 	struct hn_nvs_rndis rndis;
 
 	KASSERT(txd->chim_index != HN_NVS_CHIM_IDX_INVALID &&
 	    txd->chim_size > 0, ("invalid rndis chim txd"));
 
 	rndis.nvs_type = HN_NVS_TYPE_RNDIS;
 	rndis.nvs_rndis_mtype = HN_NVS_RNDIS_MTYPE_DATA;
 	rndis.nvs_chim_idx = txd->chim_index;
 	rndis.nvs_chim_sz = txd->chim_size;
 
 	return (hn_nvs_send(txr->hn_chan, VMBUS_CHANPKT_FLAG_RC,
 	    &rndis, sizeof(rndis), &txd->send_ctx));
 }
 
 static __inline uint32_t
 hn_chim_alloc(struct hn_softc *sc)
 {
 	int i, bmap_cnt = sc->hn_chim_bmap_cnt;
 	u_long *bmap = sc->hn_chim_bmap;
 	uint32_t ret = HN_NVS_CHIM_IDX_INVALID;
 
 	for (i = 0; i < bmap_cnt; ++i) {
 		int idx;
 
 		idx = ffsl(~bmap[i]);
 		if (idx == 0)
 			continue;
 
 		--idx; /* ffsl is 1-based */
 		KASSERT(i * LONG_BIT + idx < sc->hn_chim_cnt,
 		    ("invalid i %d and idx %d", i, idx));
 
 		if (atomic_testandset_long(&bmap[i], idx))
 			continue;
 
 		ret = i * LONG_BIT + idx;
 		break;
 	}
 	return (ret);
 }
 
 static __inline void
 hn_chim_free(struct hn_softc *sc, uint32_t chim_idx)
 {
 	u_long mask;
 	uint32_t idx;
 
 	idx = chim_idx / LONG_BIT;
 	KASSERT(idx < sc->hn_chim_bmap_cnt,
 	    ("invalid chimney index 0x%x", chim_idx));
 
 	mask = 1UL << (chim_idx % LONG_BIT);
 	KASSERT(sc->hn_chim_bmap[idx] & mask,
 	    ("index bitmap 0x%lx, chimney index %u, "
 	     "bitmap idx %d, bitmask 0x%lx",
 	     sc->hn_chim_bmap[idx], chim_idx, idx, mask));
 
 	atomic_clear_long(&sc->hn_chim_bmap[idx], mask);
 }
 
 #if defined(INET6) || defined(INET)
 /*
  * NOTE: If this function failed, the m_head would be freed.
  */
 static __inline struct mbuf *
 hn_tso_fixup(struct mbuf *m_head)
 {
 	struct ether_vlan_header *evl;
 	struct tcphdr *th;
 	int ehlen;
 
 	KASSERT(M_WRITABLE(m_head), ("TSO mbuf not writable"));
 
 #define PULLUP_HDR(m, len)				\
 do {							\
 	if (__predict_false((m)->m_len < (len))) {	\
 		(m) = m_pullup((m), (len));		\
 		if ((m) == NULL)			\
 			return (NULL);			\
 	}						\
 } while (0)
 
 	PULLUP_HDR(m_head, sizeof(*evl));
 	evl = mtod(m_head, struct ether_vlan_header *);
 	if (evl->evl_encap_proto == ntohs(ETHERTYPE_VLAN))
 		ehlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
 	else
 		ehlen = ETHER_HDR_LEN;
 
 #ifdef INET
 	if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) {
 		struct ip *ip;
 		int iphlen;
 
 		PULLUP_HDR(m_head, ehlen + sizeof(*ip));
 		ip = mtodo(m_head, ehlen);
 		iphlen = ip->ip_hl << 2;
 
 		PULLUP_HDR(m_head, ehlen + iphlen + sizeof(*th));
 		th = mtodo(m_head, ehlen + iphlen);
 
 		ip->ip_len = 0;
 		ip->ip_sum = 0;
 		th->th_sum = in_pseudo(ip->ip_src.s_addr,
 		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
 	}
 #endif
 #if defined(INET6) && defined(INET)
 	else
 #endif
 #ifdef INET6
 	{
 		struct ip6_hdr *ip6;
 
 		PULLUP_HDR(m_head, ehlen + sizeof(*ip6));
 		ip6 = mtodo(m_head, ehlen);
 		if (ip6->ip6_nxt != IPPROTO_TCP) {
 			m_freem(m_head);
 			return (NULL);
 		}
 
 		PULLUP_HDR(m_head, ehlen + sizeof(*ip6) + sizeof(*th));
 		th = mtodo(m_head, ehlen + sizeof(*ip6));
 
 		ip6->ip6_plen = 0;
 		th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
 	}
 #endif
 	return (m_head);
 
 #undef PULLUP_HDR
 }
 #endif	/* INET6 || INET */
 
 static int
 hn_set_rxfilter(struct hn_softc *sc)
 {
 	struct ifnet *ifp = sc->hn_ifp;
 	uint32_t filter;
 	int error = 0;
 
 	HN_LOCK_ASSERT(sc);
 
 	if (ifp->if_flags & IFF_PROMISC) {
 		filter = NDIS_PACKET_TYPE_PROMISCUOUS;
 	} else {
 		filter = NDIS_PACKET_TYPE_DIRECTED;
 		if (ifp->if_flags & IFF_BROADCAST)
 			filter |= NDIS_PACKET_TYPE_BROADCAST;
 		/* TODO: support multicast list */
 		if ((ifp->if_flags & IFF_ALLMULTI) ||
 		    !TAILQ_EMPTY(&ifp->if_multiaddrs))
 			filter |= NDIS_PACKET_TYPE_ALL_MULTICAST;
 	}
 
 	if (sc->hn_rx_filter != filter) {
 		error = hn_rndis_set_rxfilter(sc, filter);
 		if (!error)
 			sc->hn_rx_filter = filter;
 	}
 	return (error);
 }
 
 static void
 hn_set_txagg(struct hn_softc *sc)
 {
 	uint32_t size, pkts;
 	int i;
 
 	/*
 	 * Setup aggregation size.
 	 */
 	if (sc->hn_agg_size < 0)
 		size = UINT32_MAX;
 	else
 		size = sc->hn_agg_size;
 
 	if (sc->hn_rndis_agg_size < size)
 		size = sc->hn_rndis_agg_size;
 
+	/* NOTE: We only aggregate packets using chimney sending buffers. */
+	if (size > (uint32_t)sc->hn_chim_szmax)
+		size = sc->hn_chim_szmax;
+
 	if (size <= 2 * HN_PKTSIZE_MIN(sc->hn_rndis_agg_align)) {
 		/* Disable */
 		size = 0;
 		pkts = 0;
 		goto done;
 	}
 
 	/* NOTE: Type of the per TX ring setting is 'int'. */
 	if (size > INT_MAX)
 		size = INT_MAX;
 
-	/* NOTE: We only aggregate packets using chimney sending buffers. */
-	if (size > (uint32_t)sc->hn_chim_szmax)
-		size = sc->hn_chim_szmax;
-
 	/*
 	 * Setup aggregation packet count.
 	 */
 	if (sc->hn_agg_pkts < 0)
 		pkts = UINT32_MAX;
 	else
 		pkts = sc->hn_agg_pkts;
 
 	if (sc->hn_rndis_agg_pkts < pkts)
 		pkts = sc->hn_rndis_agg_pkts;
 
 	if (pkts <= 1) {
 		/* Disable */
 		size = 0;
 		pkts = 0;
 		goto done;
 	}
 
 	/* NOTE: Type of the per TX ring setting is 'short'. */
 	if (pkts > SHRT_MAX)
 		pkts = SHRT_MAX;
 
 done:
 	/* NOTE: Type of the per TX ring setting is 'short'. */
 	if (sc->hn_rndis_agg_align > SHRT_MAX) {
 		/* Disable */
 		size = 0;
 		pkts = 0;
 	}
 
 	if (bootverbose) {
 		if_printf(sc->hn_ifp, "TX agg size %u, pkts %u, align %u\n",
 		    size, pkts, sc->hn_rndis_agg_align);
 	}
 
 	for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
 		struct hn_tx_ring *txr = &sc->hn_tx_ring[i];
 
 		mtx_lock(&txr->hn_tx_lock);
 		txr->hn_agg_szmax = size;
 		txr->hn_agg_pktmax = pkts;
 		txr->hn_agg_align = sc->hn_rndis_agg_align;
 		mtx_unlock(&txr->hn_tx_lock);
 	}
 }
 
 static int
 hn_get_txswq_depth(const struct hn_tx_ring *txr)
 {
 
 	KASSERT(txr->hn_txdesc_cnt > 0, ("tx ring is not setup yet"));
 	if (hn_tx_swq_depth < txr->hn_txdesc_cnt)
 		return txr->hn_txdesc_cnt;
 	return hn_tx_swq_depth;
 }
 
 static int
 hn_rss_reconfig(struct hn_softc *sc)
 {
 	int error;
 
 	HN_LOCK_ASSERT(sc);
 
 	if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0)
 		return (ENXIO);
 
 	/*
 	 * Disable RSS first.
 	 *
 	 * NOTE:
 	 * Direct reconfiguration by setting the UNCHG flags does
 	 * _not_ work properly.
 	 */
 	if (bootverbose)
 		if_printf(sc->hn_ifp, "disable RSS\n");
 	error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_DISABLE);
 	if (error) {
 		if_printf(sc->hn_ifp, "RSS disable failed\n");
 		return (error);
 	}
 
 	/*
 	 * Reenable the RSS w/ the updated RSS key or indirect
 	 * table.
 	 */
 	if (bootverbose)
 		if_printf(sc->hn_ifp, "reconfig RSS\n");
 	error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_NONE);
 	if (error) {
 		if_printf(sc->hn_ifp, "RSS reconfig failed\n");
 		return (error);
 	}
 	return (0);
 }
 
 static void
-hn_rss_ind_fixup(struct hn_softc *sc, int nchan)
+hn_rss_ind_fixup(struct hn_softc *sc)
 {
 	struct ndis_rssprm_toeplitz *rss = &sc->hn_rss;
-	int i;
+	int i, nchan;
 
+	nchan = sc->hn_rx_ring_inuse;
 	KASSERT(nchan > 1, ("invalid # of channels %d", nchan));
 
 	/*
 	 * Check indirect table to make sure that all channels in it
 	 * can be used.
 	 */
 	for (i = 0; i < NDIS_HASH_INDCNT; ++i) {
 		if (rss->rss_ind[i] >= nchan) {
 			if_printf(sc->hn_ifp,
 			    "RSS indirect table %d fixup: %u -> %d\n",
 			    i, rss->rss_ind[i], nchan - 1);
 			rss->rss_ind[i] = nchan - 1;
 		}
 	}
 }
 
 static int
 hn_ifmedia_upd(struct ifnet *ifp __unused)
 {
 
 	return EOPNOTSUPP;
 }
 
 static void
 hn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
 {
 	struct hn_softc *sc = ifp->if_softc;
 
 	ifmr->ifm_status = IFM_AVALID;
 	ifmr->ifm_active = IFM_ETHER;
 
 	if ((sc->hn_link_flags & HN_LINK_FLAG_LINKUP) == 0) {
 		ifmr->ifm_active |= IFM_NONE;
 		return;
 	}
 	ifmr->ifm_status |= IFM_ACTIVE;
 	ifmr->ifm_active |= IFM_10G_T | IFM_FDX;
 }
 
 /* {F8615163-DF3E-46c5-913F-F2D2F965ED0E} */
 static const struct hyperv_guid g_net_vsc_device_type = {
 	.hv_guid = {0x63, 0x51, 0x61, 0xF8, 0x3E, 0xDF, 0xc5, 0x46,
 		0x91, 0x3F, 0xF2, 0xD2, 0xF9, 0x65, 0xED, 0x0E}
 };
 
 static int
 hn_probe(device_t dev)
 {
 
 	if (VMBUS_PROBE_GUID(device_get_parent(dev), dev,
 	    &g_net_vsc_device_type) == 0) {
 		device_set_desc(dev, "Hyper-V Network Interface");
 		return BUS_PROBE_DEFAULT;
 	}
 	return ENXIO;
 }
 
 static int
 hn_attach(device_t dev)
 {
 	struct hn_softc *sc = device_get_softc(dev);
 	struct sysctl_oid_list *child;
 	struct sysctl_ctx_list *ctx;
 	uint8_t eaddr[ETHER_ADDR_LEN];
 	struct ifnet *ifp = NULL;
 	int error, ring_cnt, tx_ring_cnt;
 
 	sc->hn_dev = dev;
 	sc->hn_prichan = vmbus_get_channel(dev);
 	HN_LOCK_INIT(sc);
 
 	/*
 	 * Initialize these tunables once.
 	 */
 	sc->hn_agg_size = hn_tx_agg_size;
 	sc->hn_agg_pkts = hn_tx_agg_pkts;
 
 	/*
 	 * Setup taskqueue for transmission.
 	 */
 	if (hn_tx_taskq == NULL) {
 		sc->hn_tx_taskq = taskqueue_create("hn_tx", M_WAITOK,
 		    taskqueue_thread_enqueue, &sc->hn_tx_taskq);
 		if (hn_bind_tx_taskq >= 0) {
 			int cpu = hn_bind_tx_taskq;
 			cpuset_t cpu_set;
 
 			if (cpu > mp_ncpus - 1)
 				cpu = mp_ncpus - 1;
 			CPU_SETOF(cpu, &cpu_set);
 			taskqueue_start_threads_cpuset(&sc->hn_tx_taskq, 1,
 			    PI_NET, &cpu_set, "%s tx",
 			    device_get_nameunit(dev));
 		} else {
 			taskqueue_start_threads(&sc->hn_tx_taskq, 1, PI_NET,
 			    "%s tx", device_get_nameunit(dev));
 		}
 	} else {
 		sc->hn_tx_taskq = hn_tx_taskq;
 	}
 
 	/*
 	 * Setup taskqueue for mangement tasks, e.g. link status.
 	 */
 	sc->hn_mgmt_taskq0 = taskqueue_create("hn_mgmt", M_WAITOK,
 	    taskqueue_thread_enqueue, &sc->hn_mgmt_taskq0);
 	taskqueue_start_threads(&sc->hn_mgmt_taskq0, 1, PI_NET, "%s mgmt",
 	    device_get_nameunit(dev));
 	TASK_INIT(&sc->hn_link_task, 0, hn_link_taskfunc, sc);
 	TASK_INIT(&sc->hn_netchg_init, 0, hn_netchg_init_taskfunc, sc);
 	TIMEOUT_TASK_INIT(sc->hn_mgmt_taskq0, &sc->hn_netchg_status, 0,
 	    hn_netchg_status_taskfunc, sc);
 
 	/*
 	 * Allocate ifnet and setup its name earlier, so that if_printf
 	 * can be used by functions, which will be called after
 	 * ether_ifattach().
 	 */
 	ifp = sc->hn_ifp = if_alloc(IFT_ETHER);
 	ifp->if_softc = sc;
 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
 
 	/*
 	 * Initialize ifmedia earlier so that it can be unconditionally
 	 * destroyed, if error happened later on.
 	 */
 	ifmedia_init(&sc->hn_media, 0, hn_ifmedia_upd, hn_ifmedia_sts);
 
 	/*
 	 * Figure out the # of RX rings (ring_cnt) and the # of TX rings
 	 * to use (tx_ring_cnt).
 	 *
 	 * NOTE:
 	 * The # of RX rings to use is same as the # of channels to use.
 	 */
 	ring_cnt = hn_chan_cnt;
 	if (ring_cnt <= 0) {
 		/* Default */
 		ring_cnt = mp_ncpus;
 		if (ring_cnt > HN_RING_CNT_DEF_MAX)
 			ring_cnt = HN_RING_CNT_DEF_MAX;
 	} else if (ring_cnt > mp_ncpus) {
 		ring_cnt = mp_ncpus;
 	}
 
 	tx_ring_cnt = hn_tx_ring_cnt;
 	if (tx_ring_cnt <= 0 || tx_ring_cnt > ring_cnt)
 		tx_ring_cnt = ring_cnt;
 #ifdef HN_IFSTART_SUPPORT
 	if (hn_use_if_start) {
 		/* ifnet.if_start only needs one TX ring. */
 		tx_ring_cnt = 1;
 	}
 #endif
 
 	/*
 	 * Set the leader CPU for channels.
 	 */
 	sc->hn_cpu = atomic_fetchadd_int(&hn_cpu_index, ring_cnt) % mp_ncpus;
 
 	/*
 	 * Create enough TX/RX rings, even if only limited number of
 	 * channels can be allocated.
 	 */
 	error = hn_create_tx_data(sc, tx_ring_cnt);
 	if (error)
 		goto failed;
 	error = hn_create_rx_data(sc, ring_cnt);
 	if (error)
 		goto failed;
 
 	/*
 	 * Create transaction context for NVS and RNDIS transactions.
 	 */
 	sc->hn_xact = vmbus_xact_ctx_create(bus_get_dma_tag(dev),
 	    HN_XACT_REQ_SIZE, HN_XACT_RESP_SIZE, 0);
 	if (sc->hn_xact == NULL) {
 		error = ENXIO;
 		goto failed;
 	}
 
 	/*
 	 * Install orphan handler for the revocation of this device's
 	 * primary channel.
 	 *
 	 * NOTE:
 	 * The processing order is critical here:
 	 * Install the orphan handler, _before_ testing whether this
 	 * device's primary channel has been revoked or not.
 	 */
 	vmbus_chan_set_orphan(sc->hn_prichan, sc->hn_xact);
 	if (vmbus_chan_is_revoked(sc->hn_prichan)) {
 		error = ENXIO;
 		goto failed;
 	}
 
 	/*
 	 * Attach the synthetic parts, i.e. NVS and RNDIS.
 	 */
 	error = hn_synth_attach(sc, ETHERMTU);
 	if (error)
 		goto failed;
 
 	error = hn_rndis_get_eaddr(sc, eaddr);
 	if (error)
 		goto failed;
 
 #if __FreeBSD_version >= 1100099
 	if (sc->hn_rx_ring_inuse > 1) {
 		/*
 		 * Reduce TCP segment aggregation limit for multiple
 		 * RX rings to increase ACK timeliness.
 		 */
 		hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MULTIRX_DEF);
 	}
 #endif
 
 	/*
 	 * Fixup TX stuffs after synthetic parts are attached.
 	 */
 	hn_fixup_tx_data(sc);
 
 	ctx = device_get_sysctl_ctx(dev);
 	child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "nvs_version", CTLFLAG_RD,
 	    &sc->hn_nvs_ver, 0, "NVS version");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "ndis_version",
 	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
 	    hn_ndis_version_sysctl, "A", "NDIS version");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "caps",
 	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
 	    hn_caps_sysctl, "A", "capabilities");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "hwassist",
 	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
 	    hn_hwassist_sysctl, "A", "hwassist");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rxfilter",
 	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
 	    hn_rxfilter_sysctl, "A", "rxfilter");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_hash",
 	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
 	    hn_rss_hash_sysctl, "A", "RSS hash");
 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rss_ind_size",
 	    CTLFLAG_RD, &sc->hn_rss_ind_size, 0, "RSS indirect entry count");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_key",
 	    CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
 	    hn_rss_key_sysctl, "IU", "RSS key");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_ind",
 	    CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
 	    hn_rss_ind_sysctl, "IU", "RSS indirect table");
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_size",
 	    CTLFLAG_RD, &sc->hn_rndis_agg_size, 0,
 	    "RNDIS offered packet transmission aggregation size limit");
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_pkts",
 	    CTLFLAG_RD, &sc->hn_rndis_agg_pkts, 0,
 	    "RNDIS offered packet transmission aggregation count limit");
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_align",
 	    CTLFLAG_RD, &sc->hn_rndis_agg_align, 0,
 	    "RNDIS packet transmission aggregation alignment");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_size",
 	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
 	    hn_txagg_size_sysctl, "I",
 	    "Packet transmission aggregation size, 0 -- disable, -1 -- auto");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_pkts",
 	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
 	    hn_txagg_pkts_sysctl, "I",
 	    "Packet transmission aggregation packets, "
 	    "0 -- disable, -1 -- auto");
 
 	/*
 	 * Setup the ifmedia, which has been initialized earlier.
 	 */
 	ifmedia_add(&sc->hn_media, IFM_ETHER | IFM_AUTO, 0, NULL);
 	ifmedia_set(&sc->hn_media, IFM_ETHER | IFM_AUTO);
 	/* XXX ifmedia_set really should do this for us */
 	sc->hn_media.ifm_media = sc->hn_media.ifm_cur->ifm_media;
 
 	/*
 	 * Setup the ifnet for this interface.
 	 */
 
 	ifp->if_baudrate = IF_Gbps(10);
 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
 	ifp->if_ioctl = hn_ioctl;
 	ifp->if_init = hn_init;
 #ifdef HN_IFSTART_SUPPORT
 	if (hn_use_if_start) {
 		int qdepth = hn_get_txswq_depth(&sc->hn_tx_ring[0]);
 
 		ifp->if_start = hn_start;
 		IFQ_SET_MAXLEN(&ifp->if_snd, qdepth);
 		ifp->if_snd.ifq_drv_maxlen = qdepth - 1;
 		IFQ_SET_READY(&ifp->if_snd);
 	} else
 #endif
 	{
 		ifp->if_transmit = hn_transmit;
 		ifp->if_qflush = hn_xmit_qflush;
 	}
 
 	ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_LRO;
 #ifdef foo
 	/* We can't diff IPv6 packets from IPv4 packets on RX path. */
 	ifp->if_capabilities |= IFCAP_RXCSUM_IPV6;
 #endif
 	if (sc->hn_caps & HN_CAP_VLAN) {
 		/* XXX not sure about VLAN_MTU. */
 		ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
 	}
 
 	ifp->if_hwassist = sc->hn_tx_ring[0].hn_csum_assist;
 	if (ifp->if_hwassist & HN_CSUM_IP_MASK)
 		ifp->if_capabilities |= IFCAP_TXCSUM;
 	if (ifp->if_hwassist & HN_CSUM_IP6_MASK)
 		ifp->if_capabilities |= IFCAP_TXCSUM_IPV6;
 	if (sc->hn_caps & HN_CAP_TSO4) {
 		ifp->if_capabilities |= IFCAP_TSO4;
 		ifp->if_hwassist |= CSUM_IP_TSO;
 	}
 	if (sc->hn_caps & HN_CAP_TSO6) {
 		ifp->if_capabilities |= IFCAP_TSO6;
 		ifp->if_hwassist |= CSUM_IP6_TSO;
 	}
 
 	/* Enable all available capabilities by default. */
 	ifp->if_capenable = ifp->if_capabilities;
 
 	/*
 	 * Disable IPv6 TSO and TXCSUM by default, they still can
 	 * be enabled through SIOCSIFCAP.
 	 */
 	ifp->if_capenable &= ~(IFCAP_TXCSUM_IPV6 | IFCAP_TSO6);
 	ifp->if_hwassist &= ~(HN_CSUM_IP6_MASK | CSUM_IP6_TSO);
 
 	if (ifp->if_capabilities & (IFCAP_TSO6 | IFCAP_TSO4)) {
 		hn_set_tso_maxsize(sc, hn_tso_maxlen, ETHERMTU);
 		ifp->if_hw_tsomaxsegcount = HN_TX_DATA_SEGCNT_MAX;
 		ifp->if_hw_tsomaxsegsize = PAGE_SIZE;
 	}
 
 	ether_ifattach(ifp, eaddr);
 
 	if ((ifp->if_capabilities & (IFCAP_TSO6 | IFCAP_TSO4)) && bootverbose) {
 		if_printf(ifp, "TSO segcnt %u segsz %u\n",
 		    ifp->if_hw_tsomaxsegcount, ifp->if_hw_tsomaxsegsize);
 	}
 
 	/* Inform the upper layer about the long frame support. */
 	ifp->if_hdrlen = sizeof(struct ether_vlan_header);
 
 	/*
 	 * Kick off link status check.
 	 */
 	sc->hn_mgmt_taskq = sc->hn_mgmt_taskq0;
 	hn_update_link_status(sc);
 
 	return (0);
 failed:
 	if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED)
 		hn_synth_detach(sc);
 	hn_detach(dev);
 	return (error);
 }
 
 static int
 hn_detach(device_t dev)
 {
 	struct hn_softc *sc = device_get_softc(dev);
 	struct ifnet *ifp = sc->hn_ifp;
 
 	if (sc->hn_xact != NULL && vmbus_chan_is_revoked(sc->hn_prichan)) {
 		/*
 		 * In case that the vmbus missed the orphan handler
 		 * installation.
 		 */
 		vmbus_xact_ctx_orphan(sc->hn_xact);
 	}
 
 	if (device_is_attached(dev)) {
 		HN_LOCK(sc);
 		if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) {
 			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
 				hn_stop(sc);
 			/*
 			 * NOTE:
 			 * hn_stop() only suspends data, so managment
 			 * stuffs have to be suspended manually here.
 			 */
 			hn_suspend_mgmt(sc);
 			hn_synth_detach(sc);
 		}
 		HN_UNLOCK(sc);
 		ether_ifdetach(ifp);
 	}
 
 	ifmedia_removeall(&sc->hn_media);
 	hn_destroy_rx_data(sc);
 	hn_destroy_tx_data(sc);
 
 	if (sc->hn_tx_taskq != hn_tx_taskq)
 		taskqueue_free(sc->hn_tx_taskq);
 	taskqueue_free(sc->hn_mgmt_taskq0);
 
 	if (sc->hn_xact != NULL) {
 		/*
 		 * Uninstall the orphan handler _before_ the xact is
 		 * destructed.
 		 */
 		vmbus_chan_unset_orphan(sc->hn_prichan);
 		vmbus_xact_ctx_destroy(sc->hn_xact);
 	}
 
 	if_free(ifp);
 
 	HN_LOCK_DESTROY(sc);
 	return (0);
 }
 
 static int
 hn_shutdown(device_t dev)
 {
 
 	return (0);
 }
 
 static void
 hn_link_status(struct hn_softc *sc)
 {
 	uint32_t link_status;
 	int error;
 
 	error = hn_rndis_get_linkstatus(sc, &link_status);
 	if (error) {
 		/* XXX what to do? */
 		return;
 	}
 
 	if (link_status == NDIS_MEDIA_STATE_CONNECTED)
 		sc->hn_link_flags |= HN_LINK_FLAG_LINKUP;
 	else
 		sc->hn_link_flags &= ~HN_LINK_FLAG_LINKUP;
 	if_link_state_change(sc->hn_ifp,
 	    (sc->hn_link_flags & HN_LINK_FLAG_LINKUP) ?
 	    LINK_STATE_UP : LINK_STATE_DOWN);
 }
 
 static void
 hn_link_taskfunc(void *xsc, int pending __unused)
 {
 	struct hn_softc *sc = xsc;
 
 	if (sc->hn_link_flags & HN_LINK_FLAG_NETCHG)
 		return;
 	hn_link_status(sc);
 }
 
 static void
 hn_netchg_init_taskfunc(void *xsc, int pending __unused)
 {
 	struct hn_softc *sc = xsc;
 
 	/* Prevent any link status checks from running. */
 	sc->hn_link_flags |= HN_LINK_FLAG_NETCHG;
 
 	/*
 	 * Fake up a [link down --> link up] state change; 5 seconds
 	 * delay is used, which closely simulates miibus reaction
 	 * upon link down event.
 	 */
 	sc->hn_link_flags &= ~HN_LINK_FLAG_LINKUP;
 	if_link_state_change(sc->hn_ifp, LINK_STATE_DOWN);
 	taskqueue_enqueue_timeout(sc->hn_mgmt_taskq0,
 	    &sc->hn_netchg_status, 5 * hz);
 }
 
 static void
 hn_netchg_status_taskfunc(void *xsc, int pending __unused)
 {
 	struct hn_softc *sc = xsc;
 
 	/* Re-allow link status checks. */
 	sc->hn_link_flags &= ~HN_LINK_FLAG_NETCHG;
 	hn_link_status(sc);
 }
 
 static void
 hn_update_link_status(struct hn_softc *sc)
 {
 
 	if (sc->hn_mgmt_taskq != NULL)
 		taskqueue_enqueue(sc->hn_mgmt_taskq, &sc->hn_link_task);
 }
 
 static void
 hn_change_network(struct hn_softc *sc)
 {
 
 	if (sc->hn_mgmt_taskq != NULL)
 		taskqueue_enqueue(sc->hn_mgmt_taskq, &sc->hn_netchg_init);
 }
 
 static __inline int
 hn_txdesc_dmamap_load(struct hn_tx_ring *txr, struct hn_txdesc *txd,
     struct mbuf **m_head, bus_dma_segment_t *segs, int *nsegs)
 {
 	struct mbuf *m = *m_head;
 	int error;
 
 	KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID, ("txd uses chim"));
 
 	error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag, txd->data_dmap,
 	    m, segs, nsegs, BUS_DMA_NOWAIT);
 	if (error == EFBIG) {
 		struct mbuf *m_new;
 
 		m_new = m_collapse(m, M_NOWAIT, HN_TX_DATA_SEGCNT_MAX);
 		if (m_new == NULL)
 			return ENOBUFS;
 		else
 			*m_head = m = m_new;
 		txr->hn_tx_collapsed++;
 
 		error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag,
 		    txd->data_dmap, m, segs, nsegs, BUS_DMA_NOWAIT);
 	}
 	if (!error) {
 		bus_dmamap_sync(txr->hn_tx_data_dtag, txd->data_dmap,
 		    BUS_DMASYNC_PREWRITE);
 		txd->flags |= HN_TXD_FLAG_DMAMAP;
 	}
 	return error;
 }
 
 static __inline int
 hn_txdesc_put(struct hn_tx_ring *txr, struct hn_txdesc *txd)
 {
 
 	KASSERT((txd->flags & HN_TXD_FLAG_ONLIST) == 0,
 	    ("put an onlist txd %#x", txd->flags));
 	KASSERT((txd->flags & HN_TXD_FLAG_ONAGG) == 0,
 	    ("put an onagg txd %#x", txd->flags));
 
 	KASSERT(txd->refs > 0, ("invalid txd refs %d", txd->refs));
 	if (atomic_fetchadd_int(&txd->refs, -1) != 1)
 		return 0;
 
 	if (!STAILQ_EMPTY(&txd->agg_list)) {
 		struct hn_txdesc *tmp_txd;
 
 		while ((tmp_txd = STAILQ_FIRST(&txd->agg_list)) != NULL) {
 			int freed;
 
 			KASSERT(STAILQ_EMPTY(&tmp_txd->agg_list),
 			    ("resursive aggregation on aggregated txdesc"));
 			KASSERT((tmp_txd->flags & HN_TXD_FLAG_ONAGG),
 			    ("not aggregated txdesc"));
 			KASSERT((tmp_txd->flags & HN_TXD_FLAG_DMAMAP) == 0,
 			    ("aggregated txdesc uses dmamap"));
 			KASSERT(tmp_txd->chim_index == HN_NVS_CHIM_IDX_INVALID,
 			    ("aggregated txdesc consumes "
 			     "chimney sending buffer"));
 			KASSERT(tmp_txd->chim_size == 0,
 			    ("aggregated txdesc has non-zero "
 			     "chimney sending size"));
 
 			STAILQ_REMOVE_HEAD(&txd->agg_list, agg_link);
 			tmp_txd->flags &= ~HN_TXD_FLAG_ONAGG;
 			freed = hn_txdesc_put(txr, tmp_txd);
 			KASSERT(freed, ("failed to free aggregated txdesc"));
 		}
 	}
 
 	if (txd->chim_index != HN_NVS_CHIM_IDX_INVALID) {
 		KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0,
 		    ("chim txd uses dmamap"));
 		hn_chim_free(txr->hn_sc, txd->chim_index);
 		txd->chim_index = HN_NVS_CHIM_IDX_INVALID;
 		txd->chim_size = 0;
 	} else if (txd->flags & HN_TXD_FLAG_DMAMAP) {
 		bus_dmamap_sync(txr->hn_tx_data_dtag,
 		    txd->data_dmap, BUS_DMASYNC_POSTWRITE);
 		bus_dmamap_unload(txr->hn_tx_data_dtag,
 		    txd->data_dmap);
 		txd->flags &= ~HN_TXD_FLAG_DMAMAP;
 	}
 
 	if (txd->m != NULL) {
 		m_freem(txd->m);
 		txd->m = NULL;
 	}
 
 	txd->flags |= HN_TXD_FLAG_ONLIST;
 #ifndef HN_USE_TXDESC_BUFRING
 	mtx_lock_spin(&txr->hn_txlist_spin);
 	KASSERT(txr->hn_txdesc_avail >= 0 &&
 	    txr->hn_txdesc_avail < txr->hn_txdesc_cnt,
 	    ("txdesc_put: invalid txd avail %d", txr->hn_txdesc_avail));
 	txr->hn_txdesc_avail++;
 	SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link);
 	mtx_unlock_spin(&txr->hn_txlist_spin);
 #else
 	atomic_add_int(&txr->hn_txdesc_avail, 1);
 	buf_ring_enqueue(txr->hn_txdesc_br, txd);
 #endif
 
 	return 1;
 }
 
 static __inline struct hn_txdesc *
 hn_txdesc_get(struct hn_tx_ring *txr)
 {
 	struct hn_txdesc *txd;
 
 #ifndef HN_USE_TXDESC_BUFRING
 	mtx_lock_spin(&txr->hn_txlist_spin);
 	txd = SLIST_FIRST(&txr->hn_txlist);
 	if (txd != NULL) {
 		KASSERT(txr->hn_txdesc_avail > 0,
 		    ("txdesc_get: invalid txd avail %d", txr->hn_txdesc_avail));
 		txr->hn_txdesc_avail--;
 		SLIST_REMOVE_HEAD(&txr->hn_txlist, link);
 	}
 	mtx_unlock_spin(&txr->hn_txlist_spin);
 #else
 	txd = buf_ring_dequeue_sc(txr->hn_txdesc_br);
 #endif
 
 	if (txd != NULL) {
 #ifdef HN_USE_TXDESC_BUFRING
 		atomic_subtract_int(&txr->hn_txdesc_avail, 1);
 #endif
 		KASSERT(txd->m == NULL && txd->refs == 0 &&
 		    STAILQ_EMPTY(&txd->agg_list) &&
 		    txd->chim_index == HN_NVS_CHIM_IDX_INVALID &&
 		    txd->chim_size == 0 &&
 		    (txd->flags & HN_TXD_FLAG_ONLIST) &&
 		    (txd->flags & HN_TXD_FLAG_ONAGG) == 0 &&
 		    (txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("invalid txd"));
 		txd->flags &= ~HN_TXD_FLAG_ONLIST;
 		txd->refs = 1;
 	}
 	return txd;
 }
 
 static __inline void
 hn_txdesc_hold(struct hn_txdesc *txd)
 {
 
 	/* 0->1 transition will never work */
 	KASSERT(txd->refs > 0, ("invalid txd refs %d", txd->refs));
 	atomic_add_int(&txd->refs, 1);
 }
 
 static __inline void
 hn_txdesc_agg(struct hn_txdesc *agg_txd, struct hn_txdesc *txd)
 {
 
 	KASSERT((agg_txd->flags & HN_TXD_FLAG_ONAGG) == 0,
 	    ("recursive aggregation on aggregating txdesc"));
 
 	KASSERT((txd->flags & HN_TXD_FLAG_ONAGG) == 0,
 	    ("already aggregated"));
 	KASSERT(STAILQ_EMPTY(&txd->agg_list),
 	    ("recursive aggregation on to-be-aggregated txdesc"));
 
 	txd->flags |= HN_TXD_FLAG_ONAGG;
 	STAILQ_INSERT_TAIL(&agg_txd->agg_list, txd, agg_link);
 }
 
 static bool
 hn_tx_ring_pending(struct hn_tx_ring *txr)
 {
 	bool pending = false;
 
 #ifndef HN_USE_TXDESC_BUFRING
 	mtx_lock_spin(&txr->hn_txlist_spin);
 	if (txr->hn_txdesc_avail != txr->hn_txdesc_cnt)
 		pending = true;
 	mtx_unlock_spin(&txr->hn_txlist_spin);
 #else
 	if (!buf_ring_full(txr->hn_txdesc_br))
 		pending = true;
 #endif
 	return (pending);
 }
 
 static __inline void
 hn_txeof(struct hn_tx_ring *txr)
 {
 	txr->hn_has_txeof = 0;
 	txr->hn_txeof(txr);
 }
 
 static void
 hn_txpkt_done(struct hn_nvs_sendctx *sndc, struct hn_softc *sc,
     struct vmbus_channel *chan, const void *data __unused, int dlen __unused)
 {
 	struct hn_txdesc *txd = sndc->hn_cbarg;
 	struct hn_tx_ring *txr;
 
 	txr = txd->txr;
 	KASSERT(txr->hn_chan == chan,
 	    ("channel mismatch, on chan%u, should be chan%u",
-	     vmbus_chan_subidx(chan), vmbus_chan_subidx(txr->hn_chan)));
+	     vmbus_chan_id(chan), vmbus_chan_id(txr->hn_chan)));
 
 	txr->hn_has_txeof = 1;
 	hn_txdesc_put(txr, txd);
 
 	++txr->hn_txdone_cnt;
 	if (txr->hn_txdone_cnt >= HN_EARLY_TXEOF_THRESH) {
 		txr->hn_txdone_cnt = 0;
 		if (txr->hn_oactive)
 			hn_txeof(txr);
 	}
 }
 
 static void
 hn_chan_rollup(struct hn_rx_ring *rxr, struct hn_tx_ring *txr)
 {
 #if defined(INET) || defined(INET6)
 	tcp_lro_flush_all(&rxr->hn_lro);
 #endif
 
 	/*
 	 * NOTE:
 	 * 'txr' could be NULL, if multiple channels and
 	 * ifnet.if_start method are enabled.
 	 */
 	if (txr == NULL || !txr->hn_has_txeof)
 		return;
 
 	txr->hn_txdone_cnt = 0;
 	hn_txeof(txr);
 }
 
 static __inline uint32_t
 hn_rndis_pktmsg_offset(uint32_t ofs)
 {
 
 	KASSERT(ofs >= sizeof(struct rndis_packet_msg),
 	    ("invalid RNDIS packet msg offset %u", ofs));
 	return (ofs - __offsetof(struct rndis_packet_msg, rm_dataoffset));
 }
 
 static __inline void *
 hn_rndis_pktinfo_append(struct rndis_packet_msg *pkt, size_t pktsize,
     size_t pi_dlen, uint32_t pi_type)
 {
 	const size_t pi_size = HN_RNDIS_PKTINFO_SIZE(pi_dlen);
 	struct rndis_pktinfo *pi;
 
 	KASSERT((pi_size & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK) == 0,
 	    ("unaligned pktinfo size %zu, pktinfo dlen %zu", pi_size, pi_dlen));
 
 	/*
 	 * Per-packet-info does not move; it only grows.
 	 *
 	 * NOTE:
 	 * rm_pktinfooffset in this phase counts from the beginning
 	 * of rndis_packet_msg.
 	 */
 	KASSERT(pkt->rm_pktinfooffset + pkt->rm_pktinfolen + pi_size <= pktsize,
 	    ("%u pktinfo overflows RNDIS packet msg", pi_type));
 	pi = (struct rndis_pktinfo *)((uint8_t *)pkt + pkt->rm_pktinfooffset +
 	    pkt->rm_pktinfolen);
 	pkt->rm_pktinfolen += pi_size;
 
 	pi->rm_size = pi_size;
 	pi->rm_type = pi_type;
 	pi->rm_pktinfooffset = RNDIS_PKTINFO_OFFSET;
 
 	/* Data immediately follow per-packet-info. */
 	pkt->rm_dataoffset += pi_size;
 
 	/* Update RNDIS packet msg length */
 	pkt->rm_len += pi_size;
 
 	return (pi->rm_data);
 }
 
 static __inline int
 hn_flush_txagg(struct ifnet *ifp, struct hn_tx_ring *txr)
 {
 	struct hn_txdesc *txd;
 	struct mbuf *m;
 	int error, pkts;
 
 	txd = txr->hn_agg_txd;
 	KASSERT(txd != NULL, ("no aggregate txdesc"));
 
 	/*
 	 * Since hn_txpkt() will reset this temporary stat, save
 	 * it now, so that oerrors can be updated properly, if
 	 * hn_txpkt() ever fails.
 	 */
 	pkts = txr->hn_stat_pkts;
 
 	/*
 	 * Since txd's mbuf will _not_ be freed upon hn_txpkt()
 	 * failure, save it for later freeing, if hn_txpkt() ever
 	 * fails.
 	 */
 	m = txd->m;
 	error = hn_txpkt(ifp, txr, txd);
 	if (__predict_false(error)) {
 		/* txd is freed, but m is not. */
 		m_freem(m);
 
 		txr->hn_flush_failed++;
 		if_inc_counter(ifp, IFCOUNTER_OERRORS, pkts);
 	}
 
 	/* Reset all aggregation states. */
 	txr->hn_agg_txd = NULL;
 	txr->hn_agg_szleft = 0;
 	txr->hn_agg_pktleft = 0;
 	txr->hn_agg_prevpkt = NULL;
 
 	return (error);
 }
 
 static void *
 hn_try_txagg(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd,
     int pktsize)
 {
 	void *chim;
 
 	if (txr->hn_agg_txd != NULL) {
 		if (txr->hn_agg_pktleft >= 1 && txr->hn_agg_szleft > pktsize) {
 			struct hn_txdesc *agg_txd = txr->hn_agg_txd;
 			struct rndis_packet_msg *pkt = txr->hn_agg_prevpkt;
 			int olen;
 
 			/*
 			 * Update the previous RNDIS packet's total length,
 			 * it can be increased due to the mandatory alignment
 			 * padding for this RNDIS packet.  And update the
 			 * aggregating txdesc's chimney sending buffer size
 			 * accordingly.
 			 *
 			 * XXX
 			 * Zero-out the padding, as required by the RNDIS spec.
 			 */
 			olen = pkt->rm_len;
 			pkt->rm_len = roundup2(olen, txr->hn_agg_align);
 			agg_txd->chim_size += pkt->rm_len - olen;
 
 			/* Link this txdesc to the parent. */
 			hn_txdesc_agg(agg_txd, txd);
 
 			chim = (uint8_t *)pkt + pkt->rm_len;
 			/* Save the current packet for later fixup. */
 			txr->hn_agg_prevpkt = chim;
 
 			txr->hn_agg_pktleft--;
 			txr->hn_agg_szleft -= pktsize;
 			if (txr->hn_agg_szleft <=
 			    HN_PKTSIZE_MIN(txr->hn_agg_align)) {
 				/*
 				 * Probably can't aggregate more packets,
 				 * flush this aggregating txdesc proactively.
 				 */
 				txr->hn_agg_pktleft = 0;
 			}
 			/* Done! */
 			return (chim);
 		}
 		hn_flush_txagg(ifp, txr);
 	}
 	KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc"));
 
 	txr->hn_tx_chimney_tried++;
 	txd->chim_index = hn_chim_alloc(txr->hn_sc);
 	if (txd->chim_index == HN_NVS_CHIM_IDX_INVALID)
 		return (NULL);
 	txr->hn_tx_chimney++;
 
 	chim = txr->hn_sc->hn_chim +
 	    (txd->chim_index * txr->hn_sc->hn_chim_szmax);
 
 	if (txr->hn_agg_pktmax > 1 &&
 	    txr->hn_agg_szmax > pktsize + HN_PKTSIZE_MIN(txr->hn_agg_align)) {
 		txr->hn_agg_txd = txd;
 		txr->hn_agg_pktleft = txr->hn_agg_pktmax - 1;
 		txr->hn_agg_szleft = txr->hn_agg_szmax - pktsize;
 		txr->hn_agg_prevpkt = chim;
 	}
 	return (chim);
 }
 
 /*
  * NOTE:
  * If this function fails, then both txd and m_head0 will be freed.
  */
 static int
 hn_encap(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd,
     struct mbuf **m_head0)
 {
 	bus_dma_segment_t segs[HN_TX_DATA_SEGCNT_MAX];
 	int error, nsegs, i;
 	struct mbuf *m_head = *m_head0;
 	struct rndis_packet_msg *pkt;
 	uint32_t *pi_data;
 	void *chim = NULL;
 	int pkt_hlen, pkt_size;
 
 	pkt = txd->rndis_pkt;
 	pkt_size = HN_PKTSIZE(m_head, txr->hn_agg_align);
 	if (pkt_size < txr->hn_chim_size) {
 		chim = hn_try_txagg(ifp, txr, txd, pkt_size);
 		if (chim != NULL)
 			pkt = chim;
 	} else {
 		if (txr->hn_agg_txd != NULL)
 			hn_flush_txagg(ifp, txr);
 	}
 
 	pkt->rm_type = REMOTE_NDIS_PACKET_MSG;
 	pkt->rm_len = sizeof(*pkt) + m_head->m_pkthdr.len;
 	pkt->rm_dataoffset = sizeof(*pkt);
 	pkt->rm_datalen = m_head->m_pkthdr.len;
 	pkt->rm_oobdataoffset = 0;
 	pkt->rm_oobdatalen = 0;
 	pkt->rm_oobdataelements = 0;
 	pkt->rm_pktinfooffset = sizeof(*pkt);
 	pkt->rm_pktinfolen = 0;
 	pkt->rm_vchandle = 0;
 	pkt->rm_reserved = 0;
 
 	if (txr->hn_tx_flags & HN_TX_FLAG_HASHVAL) {
 		/*
 		 * Set the hash value for this packet, so that the host could
 		 * dispatch the TX done event for this packet back to this TX
 		 * ring's channel.
 		 */
 		pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN,
 		    HN_NDIS_HASH_VALUE_SIZE, HN_NDIS_PKTINFO_TYPE_HASHVAL);
 		*pi_data = txr->hn_tx_idx;
 	}
 
 	if (m_head->m_flags & M_VLANTAG) {
 		pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN,
 		    NDIS_VLAN_INFO_SIZE, NDIS_PKTINFO_TYPE_VLAN);
 		*pi_data = NDIS_VLAN_INFO_MAKE(
 		    EVL_VLANOFTAG(m_head->m_pkthdr.ether_vtag),
 		    EVL_PRIOFTAG(m_head->m_pkthdr.ether_vtag),
 		    EVL_CFIOFTAG(m_head->m_pkthdr.ether_vtag));
 	}
 
 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
 #if defined(INET6) || defined(INET)
 		pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN,
 		    NDIS_LSO2_INFO_SIZE, NDIS_PKTINFO_TYPE_LSO);
 #ifdef INET
 		if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) {
 			*pi_data = NDIS_LSO2_INFO_MAKEIPV4(0,
 			    m_head->m_pkthdr.tso_segsz);
 		}
 #endif
 #if defined(INET6) && defined(INET)
 		else
 #endif
 #ifdef INET6
 		{
 			*pi_data = NDIS_LSO2_INFO_MAKEIPV6(0,
 			    m_head->m_pkthdr.tso_segsz);
 		}
 #endif
 #endif	/* INET6 || INET */
 	} else if (m_head->m_pkthdr.csum_flags & txr->hn_csum_assist) {
 		pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN,
 		    NDIS_TXCSUM_INFO_SIZE, NDIS_PKTINFO_TYPE_CSUM);
 		if (m_head->m_pkthdr.csum_flags &
 		    (CSUM_IP6_TCP | CSUM_IP6_UDP)) {
 			*pi_data = NDIS_TXCSUM_INFO_IPV6;
 		} else {
 			*pi_data = NDIS_TXCSUM_INFO_IPV4;
 			if (m_head->m_pkthdr.csum_flags & CSUM_IP)
 				*pi_data |= NDIS_TXCSUM_INFO_IPCS;
 		}
 
 		if (m_head->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP))
 			*pi_data |= NDIS_TXCSUM_INFO_TCPCS;
 		else if (m_head->m_pkthdr.csum_flags &
 		    (CSUM_IP_UDP | CSUM_IP6_UDP))
 			*pi_data |= NDIS_TXCSUM_INFO_UDPCS;
 	}
 
 	pkt_hlen = pkt->rm_pktinfooffset + pkt->rm_pktinfolen;
 	/* Convert RNDIS packet message offsets */
 	pkt->rm_dataoffset = hn_rndis_pktmsg_offset(pkt->rm_dataoffset);
 	pkt->rm_pktinfooffset = hn_rndis_pktmsg_offset(pkt->rm_pktinfooffset);
 
 	/*
 	 * Fast path: Chimney sending.
 	 */
 	if (chim != NULL) {
 		struct hn_txdesc *tgt_txd = txd;
 
 		if (txr->hn_agg_txd != NULL) {
 			tgt_txd = txr->hn_agg_txd;
 #ifdef INVARIANTS
 			*m_head0 = NULL;
 #endif
 		}
 
 		KASSERT(pkt == chim,
 		    ("RNDIS pkt not in chimney sending buffer"));
 		KASSERT(tgt_txd->chim_index != HN_NVS_CHIM_IDX_INVALID,
 		    ("chimney sending buffer is not used"));
 		tgt_txd->chim_size += pkt->rm_len;
 
 		m_copydata(m_head, 0, m_head->m_pkthdr.len,
 		    ((uint8_t *)chim) + pkt_hlen);
 
 		txr->hn_gpa_cnt = 0;
 		txr->hn_sendpkt = hn_txpkt_chim;
 		goto done;
 	}
 
 	KASSERT(txr->hn_agg_txd == NULL, ("aggregating sglist txdesc"));
 	KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID,
 	    ("chimney buffer is used"));
 	KASSERT(pkt == txd->rndis_pkt, ("RNDIS pkt not in txdesc"));
 
 	error = hn_txdesc_dmamap_load(txr, txd, &m_head, segs, &nsegs);
 	if (__predict_false(error)) {
 		int freed;
 
 		/*
 		 * This mbuf is not linked w/ the txd yet, so free it now.
 		 */
 		m_freem(m_head);
 		*m_head0 = NULL;
 
 		freed = hn_txdesc_put(txr, txd);
 		KASSERT(freed != 0,
 		    ("fail to free txd upon txdma error"));
 
 		txr->hn_txdma_failed++;
 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 		return error;
 	}
 	*m_head0 = m_head;
 
 	/* +1 RNDIS packet message */
 	txr->hn_gpa_cnt = nsegs + 1;
 
 	/* send packet with page buffer */
 	txr->hn_gpa[0].gpa_page = atop(txd->rndis_pkt_paddr);
 	txr->hn_gpa[0].gpa_ofs = txd->rndis_pkt_paddr & PAGE_MASK;
 	txr->hn_gpa[0].gpa_len = pkt_hlen;
 
 	/*
 	 * Fill the page buffers with mbuf info after the page
 	 * buffer for RNDIS packet message.
 	 */
 	for (i = 0; i < nsegs; ++i) {
 		struct vmbus_gpa *gpa = &txr->hn_gpa[i + 1];
 
 		gpa->gpa_page = atop(segs[i].ds_addr);
 		gpa->gpa_ofs = segs[i].ds_addr & PAGE_MASK;
 		gpa->gpa_len = segs[i].ds_len;
 	}
 
 	txd->chim_index = HN_NVS_CHIM_IDX_INVALID;
 	txd->chim_size = 0;
 	txr->hn_sendpkt = hn_txpkt_sglist;
 done:
 	txd->m = m_head;
 
 	/* Set the completion routine */
 	hn_nvs_sendctx_init(&txd->send_ctx, hn_txpkt_done, txd);
 
 	/* Update temporary stats for later use. */
 	txr->hn_stat_pkts++;
 	txr->hn_stat_size += m_head->m_pkthdr.len;
 	if (m_head->m_flags & M_MCAST)
 		txr->hn_stat_mcasts++;
 
 	return 0;
 }
 
 /*
  * NOTE:
  * If this function fails, then txd will be freed, but the mbuf
  * associated w/ the txd will _not_ be freed.
  */
 static int
 hn_txpkt(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd)
 {
 	int error, send_failed = 0;
 
 again:
 	/*
 	 * Make sure that this txd and any aggregated txds are not freed
 	 * before ETHER_BPF_MTAP.
 	 */
 	hn_txdesc_hold(txd);
 	error = txr->hn_sendpkt(txr, txd);
 	if (!error) {
 		if (bpf_peers_present(ifp->if_bpf)) {
 			const struct hn_txdesc *tmp_txd;
 
 			ETHER_BPF_MTAP(ifp, txd->m);
 			STAILQ_FOREACH(tmp_txd, &txd->agg_list, agg_link)
 				ETHER_BPF_MTAP(ifp, tmp_txd->m);
 		}
 
 		if_inc_counter(ifp, IFCOUNTER_OPACKETS, txr->hn_stat_pkts);
 #ifdef HN_IFSTART_SUPPORT
 		if (!hn_use_if_start)
 #endif
 		{
 			if_inc_counter(ifp, IFCOUNTER_OBYTES,
 			    txr->hn_stat_size);
 			if (txr->hn_stat_mcasts != 0) {
 				if_inc_counter(ifp, IFCOUNTER_OMCASTS,
 				    txr->hn_stat_mcasts);
 			}
 		}
 		txr->hn_pkts += txr->hn_stat_pkts;
 		txr->hn_sends++;
 	}
 	hn_txdesc_put(txr, txd);
 
 	if (__predict_false(error)) {
 		int freed;
 
 		/*
 		 * This should "really rarely" happen.
 		 *
 		 * XXX Too many RX to be acked or too many sideband
 		 * commands to run?  Ask netvsc_channel_rollup()
 		 * to kick start later.
 		 */
 		txr->hn_has_txeof = 1;
 		if (!send_failed) {
 			txr->hn_send_failed++;
 			send_failed = 1;
 			/*
 			 * Try sending again after set hn_has_txeof;
 			 * in case that we missed the last
 			 * netvsc_channel_rollup().
 			 */
 			goto again;
 		}
 		if_printf(ifp, "send failed\n");
 
 		/*
 		 * Caller will perform further processing on the
 		 * associated mbuf, so don't free it in hn_txdesc_put();
 		 * only unload it from the DMA map in hn_txdesc_put(),
 		 * if it was loaded.
 		 */
 		txd->m = NULL;
 		freed = hn_txdesc_put(txr, txd);
 		KASSERT(freed != 0,
 		    ("fail to free txd upon send error"));
 
 		txr->hn_send_failed++;
 	}
 
 	/* Reset temporary stats, after this sending is done. */
 	txr->hn_stat_size = 0;
 	txr->hn_stat_pkts = 0;
 	txr->hn_stat_mcasts = 0;
 
 	return (error);
 }
 
 /*
  * Append the specified data to the indicated mbuf chain,
  * Extend the mbuf chain if the new data does not fit in
  * existing space.
  *
  * This is a minor rewrite of m_append() from sys/kern/uipc_mbuf.c.
  * There should be an equivalent in the kernel mbuf code,
  * but there does not appear to be one yet.
  *
  * Differs from m_append() in that additional mbufs are
  * allocated with cluster size MJUMPAGESIZE, and filled
  * accordingly.
  *
  * Return 1 if able to complete the job; otherwise 0.
  */
 static int
 hv_m_append(struct mbuf *m0, int len, c_caddr_t cp)
 {
 	struct mbuf *m, *n;
 	int remainder, space;
 
 	for (m = m0; m->m_next != NULL; m = m->m_next)
 		;
 	remainder = len;
 	space = M_TRAILINGSPACE(m);
 	if (space > 0) {
 		/*
 		 * Copy into available space.
 		 */
 		if (space > remainder)
 			space = remainder;
 		bcopy(cp, mtod(m, caddr_t) + m->m_len, space);
 		m->m_len += space;
 		cp += space;
 		remainder -= space;
 	}
 	while (remainder > 0) {
 		/*
 		 * Allocate a new mbuf; could check space
 		 * and allocate a cluster instead.
 		 */
 		n = m_getjcl(M_NOWAIT, m->m_type, 0, MJUMPAGESIZE);
 		if (n == NULL)
 			break;
 		n->m_len = min(MJUMPAGESIZE, remainder);
 		bcopy(cp, mtod(n, caddr_t), n->m_len);
 		cp += n->m_len;
 		remainder -= n->m_len;
 		m->m_next = n;
 		m = n;
 	}
 	if (m0->m_flags & M_PKTHDR)
 		m0->m_pkthdr.len += len - remainder;
 
 	return (remainder == 0);
 }
 
 #if defined(INET) || defined(INET6)
 static __inline int
 hn_lro_rx(struct lro_ctrl *lc, struct mbuf *m)
 {
 #if __FreeBSD_version >= 1100095
 	if (hn_lro_mbufq_depth) {
 		tcp_lro_queue_mbuf(lc, m);
 		return 0;
 	}
 #endif
 	return tcp_lro_rx(lc, m, 0);
 }
 #endif
 
 static int
 hn_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen,
     const struct hn_rxinfo *info)
 {
 	struct ifnet *ifp = rxr->hn_ifp;
 	struct mbuf *m_new;
 	int size, do_lro = 0, do_csum = 1;
 	int hash_type;
 
 	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
 		return (0);
 
 	/*
 	 * Bail out if packet contains more data than configured MTU.
 	 */
 	if (dlen > (ifp->if_mtu + ETHER_HDR_LEN)) {
 		return (0);
 	} else if (dlen <= MHLEN) {
 		m_new = m_gethdr(M_NOWAIT, MT_DATA);
 		if (m_new == NULL) {
 			if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
 			return (0);
 		}
 		memcpy(mtod(m_new, void *), data, dlen);
 		m_new->m_pkthdr.len = m_new->m_len = dlen;
 		rxr->hn_small_pkts++;
 	} else {
 		/*
 		 * Get an mbuf with a cluster.  For packets 2K or less,
 		 * get a standard 2K cluster.  For anything larger, get a
 		 * 4K cluster.  Any buffers larger than 4K can cause problems
 		 * if looped around to the Hyper-V TX channel, so avoid them.
 		 */
 		size = MCLBYTES;
 		if (dlen > MCLBYTES) {
 			/* 4096 */
 			size = MJUMPAGESIZE;
 		}
 
 		m_new = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, size);
 		if (m_new == NULL) {
 			if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
 			return (0);
 		}
 
 		hv_m_append(m_new, dlen, data);
 	}
 	m_new->m_pkthdr.rcvif = ifp;
 
 	if (__predict_false((ifp->if_capenable & IFCAP_RXCSUM) == 0))
 		do_csum = 0;
 
 	/* receive side checksum offload */
 	if (info->csum_info != HN_NDIS_RXCSUM_INFO_INVALID) {
 		/* IP csum offload */
 		if ((info->csum_info & NDIS_RXCSUM_INFO_IPCS_OK) && do_csum) {
 			m_new->m_pkthdr.csum_flags |=
 			    (CSUM_IP_CHECKED | CSUM_IP_VALID);
 			rxr->hn_csum_ip++;
 		}
 
 		/* TCP/UDP csum offload */
 		if ((info->csum_info & (NDIS_RXCSUM_INFO_UDPCS_OK |
 		     NDIS_RXCSUM_INFO_TCPCS_OK)) && do_csum) {
 			m_new->m_pkthdr.csum_flags |=
 			    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
 			m_new->m_pkthdr.csum_data = 0xffff;
 			if (info->csum_info & NDIS_RXCSUM_INFO_TCPCS_OK)
 				rxr->hn_csum_tcp++;
 			else
 				rxr->hn_csum_udp++;
 		}
 
 		/*
 		 * XXX
 		 * As of this write (Oct 28th, 2016), host side will turn
 		 * on only TCPCS_OK and IPCS_OK even for UDP datagrams, so
 		 * the do_lro setting here is actually _not_ accurate.  We
 		 * depend on the RSS hash type check to reset do_lro.
 		 */
 		if ((info->csum_info &
 		     (NDIS_RXCSUM_INFO_TCPCS_OK | NDIS_RXCSUM_INFO_IPCS_OK)) ==
 		    (NDIS_RXCSUM_INFO_TCPCS_OK | NDIS_RXCSUM_INFO_IPCS_OK))
 			do_lro = 1;
 	} else {
 		const struct ether_header *eh;
 		uint16_t etype;
 		int hoff;
 
 		hoff = sizeof(*eh);
 		if (m_new->m_len < hoff)
 			goto skip;
 		eh = mtod(m_new, struct ether_header *);
 		etype = ntohs(eh->ether_type);
 		if (etype == ETHERTYPE_VLAN) {
 			const struct ether_vlan_header *evl;
 
 			hoff = sizeof(*evl);
 			if (m_new->m_len < hoff)
 				goto skip;
 			evl = mtod(m_new, struct ether_vlan_header *);
 			etype = ntohs(evl->evl_proto);
 		}
 
 		if (etype == ETHERTYPE_IP) {
 			int pr;
 
 			pr = hn_check_iplen(m_new, hoff);
 			if (pr == IPPROTO_TCP) {
 				if (do_csum &&
 				    (rxr->hn_trust_hcsum &
 				     HN_TRUST_HCSUM_TCP)) {
 					rxr->hn_csum_trusted++;
 					m_new->m_pkthdr.csum_flags |=
 					   (CSUM_IP_CHECKED | CSUM_IP_VALID |
 					    CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
 					m_new->m_pkthdr.csum_data = 0xffff;
 				}
 				do_lro = 1;
 			} else if (pr == IPPROTO_UDP) {
 				if (do_csum &&
 				    (rxr->hn_trust_hcsum &
 				     HN_TRUST_HCSUM_UDP)) {
 					rxr->hn_csum_trusted++;
 					m_new->m_pkthdr.csum_flags |=
 					   (CSUM_IP_CHECKED | CSUM_IP_VALID |
 					    CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
 					m_new->m_pkthdr.csum_data = 0xffff;
 				}
 			} else if (pr != IPPROTO_DONE && do_csum &&
 			    (rxr->hn_trust_hcsum & HN_TRUST_HCSUM_IP)) {
 				rxr->hn_csum_trusted++;
 				m_new->m_pkthdr.csum_flags |=
 				    (CSUM_IP_CHECKED | CSUM_IP_VALID);
 			}
 		}
 	}
 skip:
 	if (info->vlan_info != HN_NDIS_VLAN_INFO_INVALID) {
 		m_new->m_pkthdr.ether_vtag = EVL_MAKETAG(
 		    NDIS_VLAN_INFO_ID(info->vlan_info),
 		    NDIS_VLAN_INFO_PRI(info->vlan_info),
 		    NDIS_VLAN_INFO_CFI(info->vlan_info));
 		m_new->m_flags |= M_VLANTAG;
 	}
 
 	if (info->hash_info != HN_NDIS_HASH_INFO_INVALID) {
 		rxr->hn_rss_pkts++;
 		m_new->m_pkthdr.flowid = info->hash_value;
 		hash_type = M_HASHTYPE_OPAQUE_HASH;
 		if ((info->hash_info & NDIS_HASH_FUNCTION_MASK) ==
 		    NDIS_HASH_FUNCTION_TOEPLITZ) {
 			uint32_t type = (info->hash_info & NDIS_HASH_TYPE_MASK);
 
 			/*
 			 * NOTE:
 			 * do_lro is resetted, if the hash types are not TCP
 			 * related.  See the comment in the above csum_flags
 			 * setup section.
 			 */
 			switch (type) {
 			case NDIS_HASH_IPV4:
 				hash_type = M_HASHTYPE_RSS_IPV4;
 				do_lro = 0;
 				break;
 
 			case NDIS_HASH_TCP_IPV4:
 				hash_type = M_HASHTYPE_RSS_TCP_IPV4;
 				break;
 
 			case NDIS_HASH_IPV6:
 				hash_type = M_HASHTYPE_RSS_IPV6;
 				do_lro = 0;
 				break;
 
 			case NDIS_HASH_IPV6_EX:
 				hash_type = M_HASHTYPE_RSS_IPV6_EX;
 				do_lro = 0;
 				break;
 
 			case NDIS_HASH_TCP_IPV6:
 				hash_type = M_HASHTYPE_RSS_TCP_IPV6;
 				break;
 
 			case NDIS_HASH_TCP_IPV6_EX:
 				hash_type = M_HASHTYPE_RSS_TCP_IPV6_EX;
 				break;
 			}
 		}
 	} else {
 		m_new->m_pkthdr.flowid = rxr->hn_rx_idx;
 		hash_type = M_HASHTYPE_OPAQUE;
 	}
 	M_HASHTYPE_SET(m_new, hash_type);
 
 	/*
 	 * Note:  Moved RX completion back to hv_nv_on_receive() so all
 	 * messages (not just data messages) will trigger a response.
 	 */
 
 	if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
 	rxr->hn_pkts++;
 
 	if ((ifp->if_capenable & IFCAP_LRO) && do_lro) {
 #if defined(INET) || defined(INET6)
 		struct lro_ctrl *lro = &rxr->hn_lro;
 
 		if (lro->lro_cnt) {
 			rxr->hn_lro_tried++;
 			if (hn_lro_rx(lro, m_new) == 0) {
 				/* DONE! */
 				return 0;
 			}
 		}
 #endif
 	}
 
 	/* We're not holding the lock here, so don't release it */
 	(*ifp->if_input)(ifp, m_new);
 
 	return (0);
 }
 
 static int
 hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
 	struct hn_softc *sc = ifp->if_softc;
 	struct ifreq *ifr = (struct ifreq *)data;
 	int mask, error = 0;
 
 	switch (cmd) {
 	case SIOCSIFMTU:
 		if (ifr->ifr_mtu > HN_MTU_MAX) {
 			error = EINVAL;
 			break;
 		}
 
 		HN_LOCK(sc);
 
 		if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) {
 			HN_UNLOCK(sc);
 			break;
 		}
 
 		if ((sc->hn_caps & HN_CAP_MTU) == 0) {
 			/* Can't change MTU */
 			HN_UNLOCK(sc);
 			error = EOPNOTSUPP;
 			break;
 		}
 
 		if (ifp->if_mtu == ifr->ifr_mtu) {
 			HN_UNLOCK(sc);
 			break;
 		}
 
 		/*
 		 * Suspend this interface before the synthetic parts
 		 * are ripped.
 		 */
 		hn_suspend(sc);
 
 		/*
 		 * Detach the synthetics parts, i.e. NVS and RNDIS.
 		 */
 		hn_synth_detach(sc);
 
 		/*
 		 * Reattach the synthetic parts, i.e. NVS and RNDIS,
 		 * with the new MTU setting.
 		 */
 		error = hn_synth_attach(sc, ifr->ifr_mtu);
 		if (error) {
 			HN_UNLOCK(sc);
 			break;
 		}
 
 		/*
 		 * Commit the requested MTU, after the synthetic parts
 		 * have been successfully attached.
 		 */
 		ifp->if_mtu = ifr->ifr_mtu;
 
 		/*
 		 * Make sure that various parameters based on MTU are
 		 * still valid, after the MTU change.
 		 */
 		if (sc->hn_tx_ring[0].hn_chim_size > sc->hn_chim_szmax)
 			hn_set_chim_size(sc, sc->hn_chim_szmax);
 		hn_set_tso_maxsize(sc, hn_tso_maxlen, ifp->if_mtu);
 #if __FreeBSD_version >= 1100099
 		if (sc->hn_rx_ring[0].hn_lro.lro_length_lim <
 		    HN_LRO_LENLIM_MIN(ifp))
 			hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MIN(ifp));
 #endif
 
 		/*
 		 * All done!  Resume the interface now.
 		 */
 		hn_resume(sc);
 
 		HN_UNLOCK(sc);
 		break;
 
 	case SIOCSIFFLAGS:
 		HN_LOCK(sc);
 
 		if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) {
 			HN_UNLOCK(sc);
 			break;
 		}
 
 		if (ifp->if_flags & IFF_UP) {
 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 				/*
 				 * Caller meight hold mutex, e.g.
 				 * bpf; use busy-wait for the RNDIS
 				 * reply.
 				 */
 				HN_NO_SLEEPING(sc);
 				hn_set_rxfilter(sc);
 				HN_SLEEPING_OK(sc);
 			} else {
 				hn_init_locked(sc);
 			}
 		} else {
 			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
 				hn_stop(sc);
 		}
 		sc->hn_if_flags = ifp->if_flags;
 
 		HN_UNLOCK(sc);
 		break;
 
 	case SIOCSIFCAP:
 		HN_LOCK(sc);
 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
 
 		if (mask & IFCAP_TXCSUM) {
 			ifp->if_capenable ^= IFCAP_TXCSUM;
 			if (ifp->if_capenable & IFCAP_TXCSUM)
 				ifp->if_hwassist |= HN_CSUM_IP_HWASSIST(sc);
 			else
 				ifp->if_hwassist &= ~HN_CSUM_IP_HWASSIST(sc);
 		}
 		if (mask & IFCAP_TXCSUM_IPV6) {
 			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
 			if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
 				ifp->if_hwassist |= HN_CSUM_IP6_HWASSIST(sc);
 			else
 				ifp->if_hwassist &= ~HN_CSUM_IP6_HWASSIST(sc);
 		}
 
 		/* TODO: flip RNDIS offload parameters for RXCSUM. */
 		if (mask & IFCAP_RXCSUM)
 			ifp->if_capenable ^= IFCAP_RXCSUM;
 #ifdef foo
 		/* We can't diff IPv6 packets from IPv4 packets on RX path. */
 		if (mask & IFCAP_RXCSUM_IPV6)
 			ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
 #endif
 
 		if (mask & IFCAP_LRO)
 			ifp->if_capenable ^= IFCAP_LRO;
 
 		if (mask & IFCAP_TSO4) {
 			ifp->if_capenable ^= IFCAP_TSO4;
 			if (ifp->if_capenable & IFCAP_TSO4)
 				ifp->if_hwassist |= CSUM_IP_TSO;
 			else
 				ifp->if_hwassist &= ~CSUM_IP_TSO;
 		}
 		if (mask & IFCAP_TSO6) {
 			ifp->if_capenable ^= IFCAP_TSO6;
 			if (ifp->if_capenable & IFCAP_TSO6)
 				ifp->if_hwassist |= CSUM_IP6_TSO;
 			else
 				ifp->if_hwassist &= ~CSUM_IP6_TSO;
 		}
 
 		HN_UNLOCK(sc);
 		break;
 
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		HN_LOCK(sc);
 
 		if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) {
 			HN_UNLOCK(sc);
 			break;
 		}
 		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 			/*
 			 * Multicast uses mutex; use busy-wait for
 			 * the RNDIS reply.
 			 */
 			HN_NO_SLEEPING(sc);
 			hn_set_rxfilter(sc);
 			HN_SLEEPING_OK(sc);
 		}
 
 		HN_UNLOCK(sc);
 		break;
 
 	case SIOCSIFMEDIA:
 	case SIOCGIFMEDIA:
 		error = ifmedia_ioctl(ifp, ifr, &sc->hn_media, cmd);
 		break;
 
 	default:
 		error = ether_ioctl(ifp, cmd, data);
 		break;
 	}
 	return (error);
 }
 
 static void
 hn_stop(struct hn_softc *sc)
 {
 	struct ifnet *ifp = sc->hn_ifp;
 	int i;
 
 	HN_LOCK_ASSERT(sc);
 
 	KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED,
 	    ("synthetic parts were not attached"));
 
 	/* Clear RUNNING bit _before_ hn_suspend_data() */
 	atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_RUNNING);
 	hn_suspend_data(sc);
 
 	/* Clear OACTIVE bit. */
 	atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
 	for (i = 0; i < sc->hn_tx_ring_inuse; ++i)
 		sc->hn_tx_ring[i].hn_oactive = 0;
 }
 
 static void
 hn_init_locked(struct hn_softc *sc)
 {
 	struct ifnet *ifp = sc->hn_ifp;
 	int i;
 
 	HN_LOCK_ASSERT(sc);
 
 	if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0)
 		return;
 
 	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
 		return;
 
 	/* Configure RX filter */
 	hn_set_rxfilter(sc);
 
 	/* Clear OACTIVE bit. */
 	atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
 	for (i = 0; i < sc->hn_tx_ring_inuse; ++i)
 		sc->hn_tx_ring[i].hn_oactive = 0;
 
 	/* Clear TX 'suspended' bit. */
 	hn_resume_tx(sc, sc->hn_tx_ring_inuse);
 
 	/* Everything is ready; unleash! */
 	atomic_set_int(&ifp->if_drv_flags, IFF_DRV_RUNNING);
 }
 
 static void
 hn_init(void *xsc)
 {
 	struct hn_softc *sc = xsc;
 
 	HN_LOCK(sc);
 	hn_init_locked(sc);
 	HN_UNLOCK(sc);
 }
 
 #if __FreeBSD_version >= 1100099
 
 static int
 hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	unsigned int lenlim;
 	int error;
 
 	lenlim = sc->hn_rx_ring[0].hn_lro.lro_length_lim;
 	error = sysctl_handle_int(oidp, &lenlim, 0, req);
 	if (error || req->newptr == NULL)
 		return error;
 
 	HN_LOCK(sc);
 	if (lenlim < HN_LRO_LENLIM_MIN(sc->hn_ifp) ||
 	    lenlim > TCP_LRO_LENGTH_MAX) {
 		HN_UNLOCK(sc);
 		return EINVAL;
 	}
 	hn_set_lro_lenlim(sc, lenlim);
 	HN_UNLOCK(sc);
 
 	return 0;
 }
 
 static int
 hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int ackcnt, error, i;
 
 	/*
 	 * lro_ackcnt_lim is append count limit,
 	 * +1 to turn it into aggregation limit.
 	 */
 	ackcnt = sc->hn_rx_ring[0].hn_lro.lro_ackcnt_lim + 1;
 	error = sysctl_handle_int(oidp, &ackcnt, 0, req);
 	if (error || req->newptr == NULL)
 		return error;
 
 	if (ackcnt < 2 || ackcnt > (TCP_LRO_ACKCNT_MAX + 1))
 		return EINVAL;
 
 	/*
 	 * Convert aggregation limit back to append
 	 * count limit.
 	 */
 	--ackcnt;
 	HN_LOCK(sc);
 	for (i = 0; i < sc->hn_rx_ring_cnt; ++i)
 		sc->hn_rx_ring[i].hn_lro.lro_ackcnt_lim = ackcnt;
 	HN_UNLOCK(sc);
 	return 0;
 }
 
 #endif
 
 static int
 hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int hcsum = arg2;
 	int on, error, i;
 
 	on = 0;
 	if (sc->hn_rx_ring[0].hn_trust_hcsum & hcsum)
 		on = 1;
 
 	error = sysctl_handle_int(oidp, &on, 0, req);
 	if (error || req->newptr == NULL)
 		return error;
 
 	HN_LOCK(sc);
 	for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
 		struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
 
 		if (on)
 			rxr->hn_trust_hcsum |= hcsum;
 		else
 			rxr->hn_trust_hcsum &= ~hcsum;
 	}
 	HN_UNLOCK(sc);
 	return 0;
 }
 
 static int
 hn_chim_size_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int chim_size, error;
 
 	chim_size = sc->hn_tx_ring[0].hn_chim_size;
 	error = sysctl_handle_int(oidp, &chim_size, 0, req);
 	if (error || req->newptr == NULL)
 		return error;
 
 	if (chim_size > sc->hn_chim_szmax || chim_size <= 0)
 		return EINVAL;
 
 	HN_LOCK(sc);
 	hn_set_chim_size(sc, chim_size);
 	HN_UNLOCK(sc);
 	return 0;
 }
 
 #if __FreeBSD_version < 1100095
 static int
 hn_rx_stat_int_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int ofs = arg2, i, error;
 	struct hn_rx_ring *rxr;
 	uint64_t stat;
 
 	stat = 0;
 	for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
 		rxr = &sc->hn_rx_ring[i];
 		stat += *((int *)((uint8_t *)rxr + ofs));
 	}
 
 	error = sysctl_handle_64(oidp, &stat, 0, req);
 	if (error || req->newptr == NULL)
 		return error;
 
 	/* Zero out this stat. */
 	for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
 		rxr = &sc->hn_rx_ring[i];
 		*((int *)((uint8_t *)rxr + ofs)) = 0;
 	}
 	return 0;
 }
 #else
 static int
 hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int ofs = arg2, i, error;
 	struct hn_rx_ring *rxr;
 	uint64_t stat;
 
 	stat = 0;
 	for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
 		rxr = &sc->hn_rx_ring[i];
 		stat += *((uint64_t *)((uint8_t *)rxr + ofs));
 	}
 
 	error = sysctl_handle_64(oidp, &stat, 0, req);
 	if (error || req->newptr == NULL)
 		return error;
 
 	/* Zero out this stat. */
 	for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
 		rxr = &sc->hn_rx_ring[i];
 		*((uint64_t *)((uint8_t *)rxr + ofs)) = 0;
 	}
 	return 0;
 }
 
 #endif
 
 static int
 hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int ofs = arg2, i, error;
 	struct hn_rx_ring *rxr;
 	u_long stat;
 
 	stat = 0;
 	for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
 		rxr = &sc->hn_rx_ring[i];
 		stat += *((u_long *)((uint8_t *)rxr + ofs));
 	}
 
 	error = sysctl_handle_long(oidp, &stat, 0, req);
 	if (error || req->newptr == NULL)
 		return error;
 
 	/* Zero out this stat. */
 	for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
 		rxr = &sc->hn_rx_ring[i];
 		*((u_long *)((uint8_t *)rxr + ofs)) = 0;
 	}
 	return 0;
 }
 
 static int
 hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int ofs = arg2, i, error;
 	struct hn_tx_ring *txr;
 	u_long stat;
 
 	stat = 0;
 	for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
 		txr = &sc->hn_tx_ring[i];
 		stat += *((u_long *)((uint8_t *)txr + ofs));
 	}
 
 	error = sysctl_handle_long(oidp, &stat, 0, req);
 	if (error || req->newptr == NULL)
 		return error;
 
 	/* Zero out this stat. */
 	for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
 		txr = &sc->hn_tx_ring[i];
 		*((u_long *)((uint8_t *)txr + ofs)) = 0;
 	}
 	return 0;
 }
 
 static int
 hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int ofs = arg2, i, error, conf;
 	struct hn_tx_ring *txr;
 
 	txr = &sc->hn_tx_ring[0];
 	conf = *((int *)((uint8_t *)txr + ofs));
 
 	error = sysctl_handle_int(oidp, &conf, 0, req);
 	if (error || req->newptr == NULL)
 		return error;
 
 	HN_LOCK(sc);
 	for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
 		txr = &sc->hn_tx_ring[i];
 		*((int *)((uint8_t *)txr + ofs)) = conf;
 	}
 	HN_UNLOCK(sc);
 
 	return 0;
 }
 
 static int
 hn_txagg_size_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int error, size;
 
 	size = sc->hn_agg_size;
 	error = sysctl_handle_int(oidp, &size, 0, req);
 	if (error || req->newptr == NULL)
 		return (error);
 
 	HN_LOCK(sc);
 	sc->hn_agg_size = size;
 	hn_set_txagg(sc);
 	HN_UNLOCK(sc);
 
 	return (0);
 }
 
 static int
 hn_txagg_pkts_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int error, pkts;
 
 	pkts = sc->hn_agg_pkts;
 	error = sysctl_handle_int(oidp, &pkts, 0, req);
 	if (error || req->newptr == NULL)
 		return (error);
 
 	HN_LOCK(sc);
 	sc->hn_agg_pkts = pkts;
 	hn_set_txagg(sc);
 	HN_UNLOCK(sc);
 
 	return (0);
 }
 
 static int
 hn_txagg_pktmax_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int pkts;
 
 	pkts = sc->hn_tx_ring[0].hn_agg_pktmax;
 	return (sysctl_handle_int(oidp, &pkts, 0, req));
 }
 
 static int
 hn_txagg_align_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int align;
 
 	align = sc->hn_tx_ring[0].hn_agg_align;
 	return (sysctl_handle_int(oidp, &align, 0, req));
 }
 
 static int
 hn_ndis_version_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	char verstr[16];
 
 	snprintf(verstr, sizeof(verstr), "%u.%u",
 	    HN_NDIS_VERSION_MAJOR(sc->hn_ndis_ver),
 	    HN_NDIS_VERSION_MINOR(sc->hn_ndis_ver));
 	return sysctl_handle_string(oidp, verstr, sizeof(verstr), req);
 }
 
 static int
 hn_caps_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	char caps_str[128];
 	uint32_t caps;
 
 	HN_LOCK(sc);
 	caps = sc->hn_caps;
 	HN_UNLOCK(sc);
 	snprintf(caps_str, sizeof(caps_str), "%b", caps, HN_CAP_BITS);
 	return sysctl_handle_string(oidp, caps_str, sizeof(caps_str), req);
 }
 
 static int
 hn_hwassist_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	char assist_str[128];
 	uint32_t hwassist;
 
 	HN_LOCK(sc);
 	hwassist = sc->hn_ifp->if_hwassist;
 	HN_UNLOCK(sc);
 	snprintf(assist_str, sizeof(assist_str), "%b", hwassist, CSUM_BITS);
 	return sysctl_handle_string(oidp, assist_str, sizeof(assist_str), req);
 }
 
 static int
 hn_rxfilter_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	char filter_str[128];
 	uint32_t filter;
 
 	HN_LOCK(sc);
 	filter = sc->hn_rx_filter;
 	HN_UNLOCK(sc);
 	snprintf(filter_str, sizeof(filter_str), "%b", filter,
 	    NDIS_PACKET_TYPES);
 	return sysctl_handle_string(oidp, filter_str, sizeof(filter_str), req);
 }
 
 static int
 hn_rss_key_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int error;
 
 	HN_LOCK(sc);
 
 	error = SYSCTL_OUT(req, sc->hn_rss.rss_key, sizeof(sc->hn_rss.rss_key));
 	if (error || req->newptr == NULL)
 		goto back;
 
 	error = SYSCTL_IN(req, sc->hn_rss.rss_key, sizeof(sc->hn_rss.rss_key));
 	if (error)
 		goto back;
 	sc->hn_flags |= HN_FLAG_HAS_RSSKEY;
 
 	if (sc->hn_rx_ring_inuse > 1) {
 		error = hn_rss_reconfig(sc);
 	} else {
 		/* Not RSS capable, at least for now; just save the RSS key. */
 		error = 0;
 	}
 back:
 	HN_UNLOCK(sc);
 	return (error);
 }
 
 static int
 hn_rss_ind_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int error;
 
 	HN_LOCK(sc);
 
 	error = SYSCTL_OUT(req, sc->hn_rss.rss_ind, sizeof(sc->hn_rss.rss_ind));
 	if (error || req->newptr == NULL)
 		goto back;
 
 	/*
 	 * Don't allow RSS indirect table change, if this interface is not
 	 * RSS capable currently.
 	 */
 	if (sc->hn_rx_ring_inuse == 1) {
 		error = EOPNOTSUPP;
 		goto back;
 	}
 
 	error = SYSCTL_IN(req, sc->hn_rss.rss_ind, sizeof(sc->hn_rss.rss_ind));
 	if (error)
 		goto back;
 	sc->hn_flags |= HN_FLAG_HAS_RSSIND;
 
-	hn_rss_ind_fixup(sc, sc->hn_rx_ring_inuse);
+	hn_rss_ind_fixup(sc);
 	error = hn_rss_reconfig(sc);
 back:
 	HN_UNLOCK(sc);
 	return (error);
 }
 
 static int
 hn_rss_hash_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	char hash_str[128];
 	uint32_t hash;
 
 	HN_LOCK(sc);
 	hash = sc->hn_rss_hash;
 	HN_UNLOCK(sc);
 	snprintf(hash_str, sizeof(hash_str), "%b", hash, NDIS_HASH_BITS);
 	return sysctl_handle_string(oidp, hash_str, sizeof(hash_str), req);
 }
 
 static int
 hn_check_iplen(const struct mbuf *m, int hoff)
 {
 	const struct ip *ip;
 	int len, iphlen, iplen;
 	const struct tcphdr *th;
 	int thoff;				/* TCP data offset */
 
 	len = hoff + sizeof(struct ip);
 
 	/* The packet must be at least the size of an IP header. */
 	if (m->m_pkthdr.len < len)
 		return IPPROTO_DONE;
 
 	/* The fixed IP header must reside completely in the first mbuf. */
 	if (m->m_len < len)
 		return IPPROTO_DONE;
 
 	ip = mtodo(m, hoff);
 
 	/* Bound check the packet's stated IP header length. */
 	iphlen = ip->ip_hl << 2;
 	if (iphlen < sizeof(struct ip))		/* minimum header length */
 		return IPPROTO_DONE;
 
 	/* The full IP header must reside completely in the one mbuf. */
 	if (m->m_len < hoff + iphlen)
 		return IPPROTO_DONE;
 
 	iplen = ntohs(ip->ip_len);
 
 	/*
 	 * Check that the amount of data in the buffers is as
 	 * at least much as the IP header would have us expect.
 	 */
 	if (m->m_pkthdr.len < hoff + iplen)
 		return IPPROTO_DONE;
 
 	/*
 	 * Ignore IP fragments.
 	 */
 	if (ntohs(ip->ip_off) & (IP_OFFMASK | IP_MF))
 		return IPPROTO_DONE;
 
 	/*
 	 * The TCP/IP or UDP/IP header must be entirely contained within
 	 * the first fragment of a packet.
 	 */
 	switch (ip->ip_p) {
 	case IPPROTO_TCP:
 		if (iplen < iphlen + sizeof(struct tcphdr))
 			return IPPROTO_DONE;
 		if (m->m_len < hoff + iphlen + sizeof(struct tcphdr))
 			return IPPROTO_DONE;
 		th = (const struct tcphdr *)((const uint8_t *)ip + iphlen);
 		thoff = th->th_off << 2;
 		if (thoff < sizeof(struct tcphdr) || thoff + iphlen > iplen)
 			return IPPROTO_DONE;
 		if (m->m_len < hoff + iphlen + thoff)
 			return IPPROTO_DONE;
 		break;
 	case IPPROTO_UDP:
 		if (iplen < iphlen + sizeof(struct udphdr))
 			return IPPROTO_DONE;
 		if (m->m_len < hoff + iphlen + sizeof(struct udphdr))
 			return IPPROTO_DONE;
 		break;
 	default:
 		if (iplen < iphlen)
 			return IPPROTO_DONE;
 		break;
 	}
 	return ip->ip_p;
 }
 
 static int
 hn_create_rx_data(struct hn_softc *sc, int ring_cnt)
 {
 	struct sysctl_oid_list *child;
 	struct sysctl_ctx_list *ctx;
 	device_t dev = sc->hn_dev;
 #if defined(INET) || defined(INET6)
 #if __FreeBSD_version >= 1100095
 	int lroent_cnt;
 #endif
 #endif
 	int i;
 
 	/*
 	 * Create RXBUF for reception.
 	 *
 	 * NOTE:
 	 * - It is shared by all channels.
 	 * - A large enough buffer is allocated, certain version of NVSes
 	 *   may further limit the usable space.
 	 */
 	sc->hn_rxbuf = hyperv_dmamem_alloc(bus_get_dma_tag(dev),
 	    PAGE_SIZE, 0, HN_RXBUF_SIZE, &sc->hn_rxbuf_dma,
 	    BUS_DMA_WAITOK | BUS_DMA_ZERO);
 	if (sc->hn_rxbuf == NULL) {
 		device_printf(sc->hn_dev, "allocate rxbuf failed\n");
 		return (ENOMEM);
 	}
 
 	sc->hn_rx_ring_cnt = ring_cnt;
 	sc->hn_rx_ring_inuse = sc->hn_rx_ring_cnt;
 
 	sc->hn_rx_ring = malloc(sizeof(struct hn_rx_ring) * sc->hn_rx_ring_cnt,
 	    M_DEVBUF, M_WAITOK | M_ZERO);
 
 #if defined(INET) || defined(INET6)
 #if __FreeBSD_version >= 1100095
 	lroent_cnt = hn_lro_entry_count;
 	if (lroent_cnt < TCP_LRO_ENTRIES)
 		lroent_cnt = TCP_LRO_ENTRIES;
 	if (bootverbose)
 		device_printf(dev, "LRO: entry count %d\n", lroent_cnt);
 #endif
 #endif	/* INET || INET6 */
 
 	ctx = device_get_sysctl_ctx(dev);
 	child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
 
 	/* Create dev.hn.UNIT.rx sysctl tree */
 	sc->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "rx",
 	    CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
 
 	for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
 		struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
 
 		rxr->hn_br = hyperv_dmamem_alloc(bus_get_dma_tag(dev),
 		    PAGE_SIZE, 0, HN_TXBR_SIZE + HN_RXBR_SIZE,
 		    &rxr->hn_br_dma, BUS_DMA_WAITOK);
 		if (rxr->hn_br == NULL) {
 			device_printf(dev, "allocate bufring failed\n");
 			return (ENOMEM);
 		}
 
 		if (hn_trust_hosttcp)
 			rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_TCP;
 		if (hn_trust_hostudp)
 			rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_UDP;
 		if (hn_trust_hostip)
 			rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_IP;
 		rxr->hn_ifp = sc->hn_ifp;
 		if (i < sc->hn_tx_ring_cnt)
 			rxr->hn_txr = &sc->hn_tx_ring[i];
 		rxr->hn_pktbuf_len = HN_PKTBUF_LEN_DEF;
 		rxr->hn_pktbuf = malloc(rxr->hn_pktbuf_len, M_DEVBUF, M_WAITOK);
 		rxr->hn_rx_idx = i;
 		rxr->hn_rxbuf = sc->hn_rxbuf;
 
 		/*
 		 * Initialize LRO.
 		 */
 #if defined(INET) || defined(INET6)
 #if __FreeBSD_version >= 1100095
 		tcp_lro_init_args(&rxr->hn_lro, sc->hn_ifp, lroent_cnt,
 		    hn_lro_mbufq_depth);
 #else
 		tcp_lro_init(&rxr->hn_lro);
 		rxr->hn_lro.ifp = sc->hn_ifp;
 #endif
 #if __FreeBSD_version >= 1100099
 		rxr->hn_lro.lro_length_lim = HN_LRO_LENLIM_DEF;
 		rxr->hn_lro.lro_ackcnt_lim = HN_LRO_ACKCNT_DEF;
 #endif
 #endif	/* INET || INET6 */
 
 		if (sc->hn_rx_sysctl_tree != NULL) {
 			char name[16];
 
 			/*
 			 * Create per RX ring sysctl tree:
 			 * dev.hn.UNIT.rx.RINGID
 			 */
 			snprintf(name, sizeof(name), "%d", i);
 			rxr->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx,
 			    SYSCTL_CHILDREN(sc->hn_rx_sysctl_tree),
 			    OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
 
 			if (rxr->hn_rx_sysctl_tree != NULL) {
 				SYSCTL_ADD_ULONG(ctx,
 				    SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree),
 				    OID_AUTO, "packets", CTLFLAG_RW,
 				    &rxr->hn_pkts, "# of packets received");
 				SYSCTL_ADD_ULONG(ctx,
 				    SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree),
 				    OID_AUTO, "rss_pkts", CTLFLAG_RW,
 				    &rxr->hn_rss_pkts,
 				    "# of packets w/ RSS info received");
 				SYSCTL_ADD_INT(ctx,
 				    SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree),
 				    OID_AUTO, "pktbuf_len", CTLFLAG_RD,
 				    &rxr->hn_pktbuf_len, 0,
 				    "Temporary channel packet buffer length");
 			}
 		}
 	}
 
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_queued",
 	    CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_rx_ring, hn_lro.lro_queued),
 #if __FreeBSD_version < 1100095
 	    hn_rx_stat_int_sysctl,
 #else
 	    hn_rx_stat_u64_sysctl,
 #endif
 	    "LU", "LRO queued");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_flushed",
 	    CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_rx_ring, hn_lro.lro_flushed),
 #if __FreeBSD_version < 1100095
 	    hn_rx_stat_int_sysctl,
 #else
 	    hn_rx_stat_u64_sysctl,
 #endif
 	    "LU", "LRO flushed");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_tried",
 	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_rx_ring, hn_lro_tried),
 	    hn_rx_stat_ulong_sysctl, "LU", "# of LRO tries");
 #if __FreeBSD_version >= 1100099
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_length_lim",
 	    CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
 	    hn_lro_lenlim_sysctl, "IU",
 	    "Max # of data bytes to be aggregated by LRO");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_ackcnt_lim",
 	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
 	    hn_lro_ackcnt_sysctl, "I",
 	    "Max # of ACKs to be aggregated by LRO");
 #endif
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hosttcp",
 	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_TCP,
 	    hn_trust_hcsum_sysctl, "I",
 	    "Trust tcp segement verification on host side, "
 	    "when csum info is missing");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostudp",
 	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_UDP,
 	    hn_trust_hcsum_sysctl, "I",
 	    "Trust udp datagram verification on host side, "
 	    "when csum info is missing");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostip",
 	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_IP,
 	    hn_trust_hcsum_sysctl, "I",
 	    "Trust ip packet verification on host side, "
 	    "when csum info is missing");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_ip",
 	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_rx_ring, hn_csum_ip),
 	    hn_rx_stat_ulong_sysctl, "LU", "RXCSUM IP");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_tcp",
 	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_rx_ring, hn_csum_tcp),
 	    hn_rx_stat_ulong_sysctl, "LU", "RXCSUM TCP");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_udp",
 	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_rx_ring, hn_csum_udp),
 	    hn_rx_stat_ulong_sysctl, "LU", "RXCSUM UDP");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_trusted",
 	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_rx_ring, hn_csum_trusted),
 	    hn_rx_stat_ulong_sysctl, "LU",
 	    "# of packets that we trust host's csum verification");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "small_pkts",
 	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_rx_ring, hn_small_pkts),
 	    hn_rx_stat_ulong_sysctl, "LU", "# of small packets received");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_ack_failed",
 	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_rx_ring, hn_ack_failed),
 	    hn_rx_stat_ulong_sysctl, "LU", "# of RXBUF ack failures");
 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_cnt",
 	    CTLFLAG_RD, &sc->hn_rx_ring_cnt, 0, "# created RX rings");
 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_inuse",
 	    CTLFLAG_RD, &sc->hn_rx_ring_inuse, 0, "# used RX rings");
 
 	return (0);
 }
 
 static void
 hn_destroy_rx_data(struct hn_softc *sc)
 {
 	int i;
 
 	if (sc->hn_rxbuf != NULL) {
-		hyperv_dmamem_free(&sc->hn_rxbuf_dma, sc->hn_rxbuf);
+		if ((sc->hn_flags & HN_FLAG_RXBUF_REF) == 0)
+			hyperv_dmamem_free(&sc->hn_rxbuf_dma, sc->hn_rxbuf);
+		else
+			device_printf(sc->hn_dev, "RXBUF is referenced\n");
 		sc->hn_rxbuf = NULL;
 	}
 
 	if (sc->hn_rx_ring_cnt == 0)
 		return;
 
 	for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
 		struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
 
 		if (rxr->hn_br == NULL)
 			continue;
-		hyperv_dmamem_free(&rxr->hn_br_dma, rxr->hn_br);
+		if ((rxr->hn_rx_flags & HN_RX_FLAG_BR_REF) == 0) {
+			hyperv_dmamem_free(&rxr->hn_br_dma, rxr->hn_br);
+		} else {
+			device_printf(sc->hn_dev,
+			    "%dth channel bufring is referenced", i);
+		}
 		rxr->hn_br = NULL;
 
 #if defined(INET) || defined(INET6)
 		tcp_lro_free(&rxr->hn_lro);
 #endif
 		free(rxr->hn_pktbuf, M_DEVBUF);
 	}
 	free(sc->hn_rx_ring, M_DEVBUF);
 	sc->hn_rx_ring = NULL;
 
 	sc->hn_rx_ring_cnt = 0;
 	sc->hn_rx_ring_inuse = 0;
 }
 
 static int
 hn_tx_ring_create(struct hn_softc *sc, int id)
 {
 	struct hn_tx_ring *txr = &sc->hn_tx_ring[id];
 	device_t dev = sc->hn_dev;
 	bus_dma_tag_t parent_dtag;
 	int error, i;
 
 	txr->hn_sc = sc;
 	txr->hn_tx_idx = id;
 
 #ifndef HN_USE_TXDESC_BUFRING
 	mtx_init(&txr->hn_txlist_spin, "hn txlist", NULL, MTX_SPIN);
 #endif
 	mtx_init(&txr->hn_tx_lock, "hn tx", NULL, MTX_DEF);
 
 	txr->hn_txdesc_cnt = HN_TX_DESC_CNT;
 	txr->hn_txdesc = malloc(sizeof(struct hn_txdesc) * txr->hn_txdesc_cnt,
 	    M_DEVBUF, M_WAITOK | M_ZERO);
 #ifndef HN_USE_TXDESC_BUFRING
 	SLIST_INIT(&txr->hn_txlist);
 #else
 	txr->hn_txdesc_br = buf_ring_alloc(txr->hn_txdesc_cnt, M_DEVBUF,
 	    M_WAITOK, &txr->hn_tx_lock);
 #endif
 
 	txr->hn_tx_taskq = sc->hn_tx_taskq;
 
 #ifdef HN_IFSTART_SUPPORT
 	if (hn_use_if_start) {
 		txr->hn_txeof = hn_start_txeof;
 		TASK_INIT(&txr->hn_tx_task, 0, hn_start_taskfunc, txr);
 		TASK_INIT(&txr->hn_txeof_task, 0, hn_start_txeof_taskfunc, txr);
 	} else
 #endif
 	{
 		int br_depth;
 
 		txr->hn_txeof = hn_xmit_txeof;
 		TASK_INIT(&txr->hn_tx_task, 0, hn_xmit_taskfunc, txr);
 		TASK_INIT(&txr->hn_txeof_task, 0, hn_xmit_txeof_taskfunc, txr);
 
 		br_depth = hn_get_txswq_depth(txr);
 		txr->hn_mbuf_br = buf_ring_alloc(br_depth, M_DEVBUF,
 		    M_WAITOK, &txr->hn_tx_lock);
 	}
 
 	txr->hn_direct_tx_size = hn_direct_tx_size;
 
 	/*
 	 * Always schedule transmission instead of trying to do direct
 	 * transmission.  This one gives the best performance so far.
 	 */
 	txr->hn_sched_tx = 1;
 
 	parent_dtag = bus_get_dma_tag(dev);
 
 	/* DMA tag for RNDIS packet messages. */
 	error = bus_dma_tag_create(parent_dtag, /* parent */
 	    HN_RNDIS_PKT_ALIGN,		/* alignment */
 	    HN_RNDIS_PKT_BOUNDARY,	/* boundary */
 	    BUS_SPACE_MAXADDR,		/* lowaddr */
 	    BUS_SPACE_MAXADDR,		/* highaddr */
 	    NULL, NULL,			/* filter, filterarg */
 	    HN_RNDIS_PKT_LEN,		/* maxsize */
 	    1,				/* nsegments */
 	    HN_RNDIS_PKT_LEN,		/* maxsegsize */
 	    0,				/* flags */
 	    NULL,			/* lockfunc */
 	    NULL,			/* lockfuncarg */
 	    &txr->hn_tx_rndis_dtag);
 	if (error) {
 		device_printf(dev, "failed to create rndis dmatag\n");
 		return error;
 	}
 
 	/* DMA tag for data. */
 	error = bus_dma_tag_create(parent_dtag, /* parent */
 	    1,				/* alignment */
 	    HN_TX_DATA_BOUNDARY,	/* boundary */
 	    BUS_SPACE_MAXADDR,		/* lowaddr */
 	    BUS_SPACE_MAXADDR,		/* highaddr */
 	    NULL, NULL,			/* filter, filterarg */
 	    HN_TX_DATA_MAXSIZE,		/* maxsize */
 	    HN_TX_DATA_SEGCNT_MAX,	/* nsegments */
 	    HN_TX_DATA_SEGSIZE,		/* maxsegsize */
 	    0,				/* flags */
 	    NULL,			/* lockfunc */
 	    NULL,			/* lockfuncarg */
 	    &txr->hn_tx_data_dtag);
 	if (error) {
 		device_printf(dev, "failed to create data dmatag\n");
 		return error;
 	}
 
 	for (i = 0; i < txr->hn_txdesc_cnt; ++i) {
 		struct hn_txdesc *txd = &txr->hn_txdesc[i];
 
 		txd->txr = txr;
 		txd->chim_index = HN_NVS_CHIM_IDX_INVALID;
 		STAILQ_INIT(&txd->agg_list);
 
 		/*
 		 * Allocate and load RNDIS packet message.
 		 */
         	error = bus_dmamem_alloc(txr->hn_tx_rndis_dtag,
 		    (void **)&txd->rndis_pkt,
 		    BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO,
 		    &txd->rndis_pkt_dmap);
 		if (error) {
 			device_printf(dev,
 			    "failed to allocate rndis_packet_msg, %d\n", i);
 			return error;
 		}
 
 		error = bus_dmamap_load(txr->hn_tx_rndis_dtag,
 		    txd->rndis_pkt_dmap,
 		    txd->rndis_pkt, HN_RNDIS_PKT_LEN,
 		    hyperv_dma_map_paddr, &txd->rndis_pkt_paddr,
 		    BUS_DMA_NOWAIT);
 		if (error) {
 			device_printf(dev,
 			    "failed to load rndis_packet_msg, %d\n", i);
 			bus_dmamem_free(txr->hn_tx_rndis_dtag,
 			    txd->rndis_pkt, txd->rndis_pkt_dmap);
 			return error;
 		}
 
 		/* DMA map for TX data. */
 		error = bus_dmamap_create(txr->hn_tx_data_dtag, 0,
 		    &txd->data_dmap);
 		if (error) {
 			device_printf(dev,
 			    "failed to allocate tx data dmamap\n");
 			bus_dmamap_unload(txr->hn_tx_rndis_dtag,
 			    txd->rndis_pkt_dmap);
 			bus_dmamem_free(txr->hn_tx_rndis_dtag,
 			    txd->rndis_pkt, txd->rndis_pkt_dmap);
 			return error;
 		}
 
 		/* All set, put it to list */
 		txd->flags |= HN_TXD_FLAG_ONLIST;
 #ifndef HN_USE_TXDESC_BUFRING
 		SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link);
 #else
 		buf_ring_enqueue(txr->hn_txdesc_br, txd);
 #endif
 	}
 	txr->hn_txdesc_avail = txr->hn_txdesc_cnt;
 
 	if (sc->hn_tx_sysctl_tree != NULL) {
 		struct sysctl_oid_list *child;
 		struct sysctl_ctx_list *ctx;
 		char name[16];
 
 		/*
 		 * Create per TX ring sysctl tree:
 		 * dev.hn.UNIT.tx.RINGID
 		 */
 		ctx = device_get_sysctl_ctx(dev);
 		child = SYSCTL_CHILDREN(sc->hn_tx_sysctl_tree);
 
 		snprintf(name, sizeof(name), "%d", id);
 		txr->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO,
 		    name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
 
 		if (txr->hn_tx_sysctl_tree != NULL) {
 			child = SYSCTL_CHILDREN(txr->hn_tx_sysctl_tree);
 
 			SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_avail",
 			    CTLFLAG_RD, &txr->hn_txdesc_avail, 0,
 			    "# of available TX descs");
 #ifdef HN_IFSTART_SUPPORT
 			if (!hn_use_if_start)
 #endif
 			{
 				SYSCTL_ADD_INT(ctx, child, OID_AUTO, "oactive",
 				    CTLFLAG_RD, &txr->hn_oactive, 0,
 				    "over active");
 			}
 			SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "packets",
 			    CTLFLAG_RW, &txr->hn_pkts,
 			    "# of packets transmitted");
 			SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "sends",
 			    CTLFLAG_RW, &txr->hn_sends, "# of sends");
 		}
 	}
 
 	return 0;
 }
 
 static void
 hn_txdesc_dmamap_destroy(struct hn_txdesc *txd)
 {
 	struct hn_tx_ring *txr = txd->txr;
 
 	KASSERT(txd->m == NULL, ("still has mbuf installed"));
 	KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("still dma mapped"));
 
 	bus_dmamap_unload(txr->hn_tx_rndis_dtag, txd->rndis_pkt_dmap);
 	bus_dmamem_free(txr->hn_tx_rndis_dtag, txd->rndis_pkt,
 	    txd->rndis_pkt_dmap);
 	bus_dmamap_destroy(txr->hn_tx_data_dtag, txd->data_dmap);
 }
 
 static void
 hn_txdesc_gc(struct hn_tx_ring *txr, struct hn_txdesc *txd)
 {
 
 	KASSERT(txd->refs == 0 || txd->refs == 1,
 	    ("invalid txd refs %d", txd->refs));
 
 	/* Aggregated txds will be freed by their aggregating txd. */
 	if (txd->refs > 0 && (txd->flags & HN_TXD_FLAG_ONAGG) == 0) {
 		int freed;
 
 		freed = hn_txdesc_put(txr, txd);
 		KASSERT(freed, ("can't free txdesc"));
 	}
 }
 
 static void
 hn_tx_ring_destroy(struct hn_tx_ring *txr)
 {
 	int i;
 
 	if (txr->hn_txdesc == NULL)
 		return;
 
 	/*
 	 * NOTE:
 	 * Because the freeing of aggregated txds will be deferred
 	 * to the aggregating txd, two passes are used here:
 	 * - The first pass GCes any pending txds.  This GC is necessary,
 	 *   since if the channels are revoked, hypervisor will not
 	 *   deliver send-done for all pending txds.
 	 * - The second pass frees the busdma stuffs, i.e. after all txds
 	 *   were freed.
 	 */
 	for (i = 0; i < txr->hn_txdesc_cnt; ++i)
 		hn_txdesc_gc(txr, &txr->hn_txdesc[i]);
 	for (i = 0; i < txr->hn_txdesc_cnt; ++i)
 		hn_txdesc_dmamap_destroy(&txr->hn_txdesc[i]);
 
 	if (txr->hn_tx_data_dtag != NULL)
 		bus_dma_tag_destroy(txr->hn_tx_data_dtag);
 	if (txr->hn_tx_rndis_dtag != NULL)
 		bus_dma_tag_destroy(txr->hn_tx_rndis_dtag);
 
 #ifdef HN_USE_TXDESC_BUFRING
 	buf_ring_free(txr->hn_txdesc_br, M_DEVBUF);
 #endif
 
 	free(txr->hn_txdesc, M_DEVBUF);
 	txr->hn_txdesc = NULL;
 
 	if (txr->hn_mbuf_br != NULL)
 		buf_ring_free(txr->hn_mbuf_br, M_DEVBUF);
 
 #ifndef HN_USE_TXDESC_BUFRING
 	mtx_destroy(&txr->hn_txlist_spin);
 #endif
 	mtx_destroy(&txr->hn_tx_lock);
 }
 
 static int
 hn_create_tx_data(struct hn_softc *sc, int ring_cnt)
 {
 	struct sysctl_oid_list *child;
 	struct sysctl_ctx_list *ctx;
 	int i;
 
 	/*
 	 * Create TXBUF for chimney sending.
 	 *
 	 * NOTE: It is shared by all channels.
 	 */
 	sc->hn_chim = hyperv_dmamem_alloc(bus_get_dma_tag(sc->hn_dev),
 	    PAGE_SIZE, 0, HN_CHIM_SIZE, &sc->hn_chim_dma,
 	    BUS_DMA_WAITOK | BUS_DMA_ZERO);
 	if (sc->hn_chim == NULL) {
 		device_printf(sc->hn_dev, "allocate txbuf failed\n");
 		return (ENOMEM);
 	}
 
 	sc->hn_tx_ring_cnt = ring_cnt;
 	sc->hn_tx_ring_inuse = sc->hn_tx_ring_cnt;
 
 	sc->hn_tx_ring = malloc(sizeof(struct hn_tx_ring) * sc->hn_tx_ring_cnt,
 	    M_DEVBUF, M_WAITOK | M_ZERO);
 
 	ctx = device_get_sysctl_ctx(sc->hn_dev);
 	child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->hn_dev));
 
 	/* Create dev.hn.UNIT.tx sysctl tree */
 	sc->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "tx",
 	    CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
 
 	for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
 		int error;
 
 		error = hn_tx_ring_create(sc, i);
 		if (error)
 			return error;
 	}
 
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "no_txdescs",
 	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_tx_ring, hn_no_txdescs),
 	    hn_tx_stat_ulong_sysctl, "LU", "# of times short of TX descs");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "send_failed",
 	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_tx_ring, hn_send_failed),
 	    hn_tx_stat_ulong_sysctl, "LU", "# of hyper-v sending failure");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "txdma_failed",
 	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_tx_ring, hn_txdma_failed),
 	    hn_tx_stat_ulong_sysctl, "LU", "# of TX DMA failure");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_flush_failed",
 	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_tx_ring, hn_flush_failed),
 	    hn_tx_stat_ulong_sysctl, "LU",
 	    "# of packet transmission aggregation flush failure");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_collapsed",
 	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_tx_ring, hn_tx_collapsed),
 	    hn_tx_stat_ulong_sysctl, "LU", "# of TX mbuf collapsed");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney",
 	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_tx_ring, hn_tx_chimney),
 	    hn_tx_stat_ulong_sysctl, "LU", "# of chimney send");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_tried",
 	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_tx_ring, hn_tx_chimney_tried),
 	    hn_tx_stat_ulong_sysctl, "LU", "# of chimney send tries");
 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_cnt",
 	    CTLFLAG_RD, &sc->hn_tx_ring[0].hn_txdesc_cnt, 0,
 	    "# of total TX descs");
 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_chimney_max",
 	    CTLFLAG_RD, &sc->hn_chim_szmax, 0,
 	    "Chimney send packet size upper boundary");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_size",
 	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
 	    hn_chim_size_sysctl, "I", "Chimney send packet size limit");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "direct_tx_size",
 	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_tx_ring, hn_direct_tx_size),
 	    hn_tx_conf_int_sysctl, "I",
 	    "Size of the packet for direct transmission");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "sched_tx",
 	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_tx_ring, hn_sched_tx),
 	    hn_tx_conf_int_sysctl, "I",
 	    "Always schedule transmission "
 	    "instead of doing direct transmission");
 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_cnt",
 	    CTLFLAG_RD, &sc->hn_tx_ring_cnt, 0, "# created TX rings");
 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_inuse",
 	    CTLFLAG_RD, &sc->hn_tx_ring_inuse, 0, "# used TX rings");
 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "agg_szmax",
 	    CTLFLAG_RD, &sc->hn_tx_ring[0].hn_agg_szmax, 0,
 	    "Applied packet transmission aggregation size");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_pktmax",
 	    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
 	    hn_txagg_pktmax_sysctl, "I",
 	    "Applied packet transmission aggregation packets");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_align",
 	    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
 	    hn_txagg_align_sysctl, "I",
 	    "Applied packet transmission aggregation alignment");
 
 	return 0;
 }
 
 static void
 hn_set_chim_size(struct hn_softc *sc, int chim_size)
 {
 	int i;
 
 	for (i = 0; i < sc->hn_tx_ring_cnt; ++i)
 		sc->hn_tx_ring[i].hn_chim_size = chim_size;
 }
 
 static void
 hn_set_tso_maxsize(struct hn_softc *sc, int tso_maxlen, int mtu)
 {
 	struct ifnet *ifp = sc->hn_ifp;
 	int tso_minlen;
 
 	if ((ifp->if_capabilities & (IFCAP_TSO4 | IFCAP_TSO6)) == 0)
 		return;
 
 	KASSERT(sc->hn_ndis_tso_sgmin >= 2,
 	    ("invalid NDIS tso sgmin %d", sc->hn_ndis_tso_sgmin));
 	tso_minlen = sc->hn_ndis_tso_sgmin * mtu;
 
 	KASSERT(sc->hn_ndis_tso_szmax >= tso_minlen &&
 	    sc->hn_ndis_tso_szmax <= IP_MAXPACKET,
 	    ("invalid NDIS tso szmax %d", sc->hn_ndis_tso_szmax));
 
 	if (tso_maxlen < tso_minlen)
 		tso_maxlen = tso_minlen;
 	else if (tso_maxlen > IP_MAXPACKET)
 		tso_maxlen = IP_MAXPACKET;
 	if (tso_maxlen > sc->hn_ndis_tso_szmax)
 		tso_maxlen = sc->hn_ndis_tso_szmax;
 	ifp->if_hw_tsomax = tso_maxlen - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
 	if (bootverbose)
 		if_printf(ifp, "TSO size max %u\n", ifp->if_hw_tsomax);
 }
 
 static void
 hn_fixup_tx_data(struct hn_softc *sc)
 {
 	uint64_t csum_assist;
 	int i;
 
 	hn_set_chim_size(sc, sc->hn_chim_szmax);
 	if (hn_tx_chimney_size > 0 &&
 	    hn_tx_chimney_size < sc->hn_chim_szmax)
 		hn_set_chim_size(sc, hn_tx_chimney_size);
 
 	csum_assist = 0;
 	if (sc->hn_caps & HN_CAP_IPCS)
 		csum_assist |= CSUM_IP;
 	if (sc->hn_caps & HN_CAP_TCP4CS)
 		csum_assist |= CSUM_IP_TCP;
 	if (sc->hn_caps & HN_CAP_UDP4CS)
 		csum_assist |= CSUM_IP_UDP;
 	if (sc->hn_caps & HN_CAP_TCP6CS)
 		csum_assist |= CSUM_IP6_TCP;
 	if (sc->hn_caps & HN_CAP_UDP6CS)
 		csum_assist |= CSUM_IP6_UDP;
 	for (i = 0; i < sc->hn_tx_ring_cnt; ++i)
 		sc->hn_tx_ring[i].hn_csum_assist = csum_assist;
 
 	if (sc->hn_caps & HN_CAP_HASHVAL) {
 		/*
 		 * Support HASHVAL pktinfo on TX path.
 		 */
 		if (bootverbose)
 			if_printf(sc->hn_ifp, "support HASHVAL pktinfo\n");
 		for (i = 0; i < sc->hn_tx_ring_cnt; ++i)
 			sc->hn_tx_ring[i].hn_tx_flags |= HN_TX_FLAG_HASHVAL;
 	}
 }
 
 static void
 hn_destroy_tx_data(struct hn_softc *sc)
 {
 	int i;
 
 	if (sc->hn_chim != NULL) {
-		hyperv_dmamem_free(&sc->hn_chim_dma, sc->hn_chim);
+		if ((sc->hn_flags & HN_FLAG_CHIM_REF) == 0) {
+			hyperv_dmamem_free(&sc->hn_chim_dma, sc->hn_chim);
+		} else {
+			device_printf(sc->hn_dev,
+			    "chimney sending buffer is referenced");
+		}
 		sc->hn_chim = NULL;
 	}
 
 	if (sc->hn_tx_ring_cnt == 0)
 		return;
 
 	for (i = 0; i < sc->hn_tx_ring_cnt; ++i)
 		hn_tx_ring_destroy(&sc->hn_tx_ring[i]);
 
 	free(sc->hn_tx_ring, M_DEVBUF);
 	sc->hn_tx_ring = NULL;
 
 	sc->hn_tx_ring_cnt = 0;
 	sc->hn_tx_ring_inuse = 0;
 }
 
 #ifdef HN_IFSTART_SUPPORT
 
 static void
 hn_start_taskfunc(void *xtxr, int pending __unused)
 {
 	struct hn_tx_ring *txr = xtxr;
 
 	mtx_lock(&txr->hn_tx_lock);
 	hn_start_locked(txr, 0);
 	mtx_unlock(&txr->hn_tx_lock);
 }
 
 static int
 hn_start_locked(struct hn_tx_ring *txr, int len)
 {
 	struct hn_softc *sc = txr->hn_sc;
 	struct ifnet *ifp = sc->hn_ifp;
 	int sched = 0;
 
 	KASSERT(hn_use_if_start,
 	    ("hn_start_locked is called, when if_start is disabled"));
 	KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring"));
 	mtx_assert(&txr->hn_tx_lock, MA_OWNED);
 	KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc"));
 
 	if (__predict_false(txr->hn_suspended))
 		return (0);
 
 	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
 	    IFF_DRV_RUNNING)
 		return (0);
 
 	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
 		struct hn_txdesc *txd;
 		struct mbuf *m_head;
 		int error;
 
 		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
 		if (m_head == NULL)
 			break;
 
 		if (len > 0 && m_head->m_pkthdr.len > len) {
 			/*
 			 * This sending could be time consuming; let callers
 			 * dispatch this packet sending (and sending of any
 			 * following up packets) to tx taskqueue.
 			 */
 			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
 			sched = 1;
 			break;
 		}
 
 #if defined(INET6) || defined(INET)
 		if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
 			m_head = hn_tso_fixup(m_head);
 			if (__predict_false(m_head == NULL)) {
 				if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 				continue;
 			}
 		}
 #endif
 
 		txd = hn_txdesc_get(txr);
 		if (txd == NULL) {
 			txr->hn_no_txdescs++;
 			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
 			atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
 			break;
 		}
 
 		error = hn_encap(ifp, txr, txd, &m_head);
 		if (error) {
 			/* Both txd and m_head are freed */
 			KASSERT(txr->hn_agg_txd == NULL,
 			    ("encap failed w/ pending aggregating txdesc"));
 			continue;
 		}
 
 		if (txr->hn_agg_pktleft == 0) {
 			if (txr->hn_agg_txd != NULL) {
 				KASSERT(m_head == NULL,
 				    ("pending mbuf for aggregating txdesc"));
 				error = hn_flush_txagg(ifp, txr);
 				if (__predict_false(error)) {
 					atomic_set_int(&ifp->if_drv_flags,
 					    IFF_DRV_OACTIVE);
 					break;
 				}
 			} else {
 				KASSERT(m_head != NULL, ("mbuf was freed"));
 				error = hn_txpkt(ifp, txr, txd);
 				if (__predict_false(error)) {
 					/* txd is freed, but m_head is not */
 					IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
 					atomic_set_int(&ifp->if_drv_flags,
 					    IFF_DRV_OACTIVE);
 					break;
 				}
 			}
 		}
 #ifdef INVARIANTS
 		else {
 			KASSERT(txr->hn_agg_txd != NULL,
 			    ("no aggregating txdesc"));
 			KASSERT(m_head == NULL,
 			    ("pending mbuf for aggregating txdesc"));
 		}
 #endif
 	}
 
 	/* Flush pending aggerated transmission. */
 	if (txr->hn_agg_txd != NULL)
 		hn_flush_txagg(ifp, txr);
 	return (sched);
 }
 
 static void
 hn_start(struct ifnet *ifp)
 {
 	struct hn_softc *sc = ifp->if_softc;
 	struct hn_tx_ring *txr = &sc->hn_tx_ring[0];
 
 	if (txr->hn_sched_tx)
 		goto do_sched;
 
 	if (mtx_trylock(&txr->hn_tx_lock)) {
 		int sched;
 
 		sched = hn_start_locked(txr, txr->hn_direct_tx_size);
 		mtx_unlock(&txr->hn_tx_lock);
 		if (!sched)
 			return;
 	}
 do_sched:
 	taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task);
 }
 
 static void
 hn_start_txeof_taskfunc(void *xtxr, int pending __unused)
 {
 	struct hn_tx_ring *txr = xtxr;
 
 	mtx_lock(&txr->hn_tx_lock);
 	atomic_clear_int(&txr->hn_sc->hn_ifp->if_drv_flags, IFF_DRV_OACTIVE);
 	hn_start_locked(txr, 0);
 	mtx_unlock(&txr->hn_tx_lock);
 }
 
 static void
 hn_start_txeof(struct hn_tx_ring *txr)
 {
 	struct hn_softc *sc = txr->hn_sc;
 	struct ifnet *ifp = sc->hn_ifp;
 
 	KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring"));
 
 	if (txr->hn_sched_tx)
 		goto do_sched;
 
 	if (mtx_trylock(&txr->hn_tx_lock)) {
 		int sched;
 
 		atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
 		sched = hn_start_locked(txr, txr->hn_direct_tx_size);
 		mtx_unlock(&txr->hn_tx_lock);
 		if (sched) {
 			taskqueue_enqueue(txr->hn_tx_taskq,
 			    &txr->hn_tx_task);
 		}
 	} else {
 do_sched:
 		/*
 		 * Release the OACTIVE earlier, with the hope, that
 		 * others could catch up.  The task will clear the
 		 * flag again with the hn_tx_lock to avoid possible
 		 * races.
 		 */
 		atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
 		taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task);
 	}
 }
 
 #endif	/* HN_IFSTART_SUPPORT */
 
 static int
 hn_xmit(struct hn_tx_ring *txr, int len)
 {
 	struct hn_softc *sc = txr->hn_sc;
 	struct ifnet *ifp = sc->hn_ifp;
 	struct mbuf *m_head;
 	int sched = 0;
 
 	mtx_assert(&txr->hn_tx_lock, MA_OWNED);
 #ifdef HN_IFSTART_SUPPORT
 	KASSERT(hn_use_if_start == 0,
 	    ("hn_xmit is called, when if_start is enabled"));
 #endif
 	KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc"));
 
 	if (__predict_false(txr->hn_suspended))
 		return (0);
 
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || txr->hn_oactive)
 		return (0);
 
 	while ((m_head = drbr_peek(ifp, txr->hn_mbuf_br)) != NULL) {
 		struct hn_txdesc *txd;
 		int error;
 
 		if (len > 0 && m_head->m_pkthdr.len > len) {
 			/*
 			 * This sending could be time consuming; let callers
 			 * dispatch this packet sending (and sending of any
 			 * following up packets) to tx taskqueue.
 			 */
 			drbr_putback(ifp, txr->hn_mbuf_br, m_head);
 			sched = 1;
 			break;
 		}
 
 		txd = hn_txdesc_get(txr);
 		if (txd == NULL) {
 			txr->hn_no_txdescs++;
 			drbr_putback(ifp, txr->hn_mbuf_br, m_head);
 			txr->hn_oactive = 1;
 			break;
 		}
 
 		error = hn_encap(ifp, txr, txd, &m_head);
 		if (error) {
 			/* Both txd and m_head are freed; discard */
 			KASSERT(txr->hn_agg_txd == NULL,
 			    ("encap failed w/ pending aggregating txdesc"));
 			drbr_advance(ifp, txr->hn_mbuf_br);
 			continue;
 		}
 
 		if (txr->hn_agg_pktleft == 0) {
 			if (txr->hn_agg_txd != NULL) {
 				KASSERT(m_head == NULL,
 				    ("pending mbuf for aggregating txdesc"));
 				error = hn_flush_txagg(ifp, txr);
 				if (__predict_false(error)) {
 					txr->hn_oactive = 1;
 					break;
 				}
 			} else {
 				KASSERT(m_head != NULL, ("mbuf was freed"));
 				error = hn_txpkt(ifp, txr, txd);
 				if (__predict_false(error)) {
 					/* txd is freed, but m_head is not */
 					drbr_putback(ifp, txr->hn_mbuf_br,
 					    m_head);
 					txr->hn_oactive = 1;
 					break;
 				}
 			}
 		}
 #ifdef INVARIANTS
 		else {
 			KASSERT(txr->hn_agg_txd != NULL,
 			    ("no aggregating txdesc"));
 			KASSERT(m_head == NULL,
 			    ("pending mbuf for aggregating txdesc"));
 		}
 #endif
 
 		/* Sent */
 		drbr_advance(ifp, txr->hn_mbuf_br);
 	}
 
 	/* Flush pending aggerated transmission. */
 	if (txr->hn_agg_txd != NULL)
 		hn_flush_txagg(ifp, txr);
 	return (sched);
 }
 
 static int
 hn_transmit(struct ifnet *ifp, struct mbuf *m)
 {
 	struct hn_softc *sc = ifp->if_softc;
 	struct hn_tx_ring *txr;
 	int error, idx = 0;
 
 #if defined(INET6) || defined(INET)
 	/*
 	 * Perform TSO packet header fixup now, since the TSO
 	 * packet header should be cache-hot.
 	 */
 	if (m->m_pkthdr.csum_flags & CSUM_TSO) {
 		m = hn_tso_fixup(m);
 		if (__predict_false(m == NULL)) {
 			if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 			return EIO;
 		}
 	}
 #endif
 
 	/*
 	 * Select the TX ring based on flowid
 	 */
 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
 		idx = m->m_pkthdr.flowid % sc->hn_tx_ring_inuse;
 	txr = &sc->hn_tx_ring[idx];
 
 	error = drbr_enqueue(ifp, txr->hn_mbuf_br, m);
 	if (error) {
 		if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1);
 		return error;
 	}
 
 	if (txr->hn_oactive)
 		return 0;
 
 	if (txr->hn_sched_tx)
 		goto do_sched;
 
 	if (mtx_trylock(&txr->hn_tx_lock)) {
 		int sched;
 
 		sched = hn_xmit(txr, txr->hn_direct_tx_size);
 		mtx_unlock(&txr->hn_tx_lock);
 		if (!sched)
 			return 0;
 	}
 do_sched:
 	taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task);
 	return 0;
 }
 
 static void
 hn_tx_ring_qflush(struct hn_tx_ring *txr)
 {
 	struct mbuf *m;
 
 	mtx_lock(&txr->hn_tx_lock);
 	while ((m = buf_ring_dequeue_sc(txr->hn_mbuf_br)) != NULL)
 		m_freem(m);
 	mtx_unlock(&txr->hn_tx_lock);
 }
 
 static void
 hn_xmit_qflush(struct ifnet *ifp)
 {
 	struct hn_softc *sc = ifp->if_softc;
 	int i;
 
 	for (i = 0; i < sc->hn_tx_ring_inuse; ++i)
 		hn_tx_ring_qflush(&sc->hn_tx_ring[i]);
 	if_qflush(ifp);
 }
 
 static void
 hn_xmit_txeof(struct hn_tx_ring *txr)
 {
 
 	if (txr->hn_sched_tx)
 		goto do_sched;
 
 	if (mtx_trylock(&txr->hn_tx_lock)) {
 		int sched;
 
 		txr->hn_oactive = 0;
 		sched = hn_xmit(txr, txr->hn_direct_tx_size);
 		mtx_unlock(&txr->hn_tx_lock);
 		if (sched) {
 			taskqueue_enqueue(txr->hn_tx_taskq,
 			    &txr->hn_tx_task);
 		}
 	} else {
 do_sched:
 		/*
 		 * Release the oactive earlier, with the hope, that
 		 * others could catch up.  The task will clear the
 		 * oactive again with the hn_tx_lock to avoid possible
 		 * races.
 		 */
 		txr->hn_oactive = 0;
 		taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task);
 	}
 }
 
 static void
 hn_xmit_taskfunc(void *xtxr, int pending __unused)
 {
 	struct hn_tx_ring *txr = xtxr;
 
 	mtx_lock(&txr->hn_tx_lock);
 	hn_xmit(txr, 0);
 	mtx_unlock(&txr->hn_tx_lock);
 }
 
 static void
 hn_xmit_txeof_taskfunc(void *xtxr, int pending __unused)
 {
 	struct hn_tx_ring *txr = xtxr;
 
 	mtx_lock(&txr->hn_tx_lock);
 	txr->hn_oactive = 0;
 	hn_xmit(txr, 0);
 	mtx_unlock(&txr->hn_tx_lock);
 }
 
 static int
 hn_chan_attach(struct hn_softc *sc, struct vmbus_channel *chan)
 {
 	struct vmbus_chan_br cbr;
 	struct hn_rx_ring *rxr;
 	struct hn_tx_ring *txr = NULL;
 	int idx, error;
 
 	idx = vmbus_chan_subidx(chan);
 
 	/*
 	 * Link this channel to RX/TX ring.
 	 */
 	KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse,
 	    ("invalid channel index %d, should > 0 && < %d",
 	     idx, sc->hn_rx_ring_inuse));
 	rxr = &sc->hn_rx_ring[idx];
 	KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED) == 0,
 	    ("RX ring %d already attached", idx));
 	rxr->hn_rx_flags |= HN_RX_FLAG_ATTACHED;
 
 	if (bootverbose) {
 		if_printf(sc->hn_ifp, "link RX ring %d to chan%u\n",
 		    idx, vmbus_chan_id(chan));
 	}
 
 	if (idx < sc->hn_tx_ring_inuse) {
 		txr = &sc->hn_tx_ring[idx];
 		KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED) == 0,
 		    ("TX ring %d already attached", idx));
 		txr->hn_tx_flags |= HN_TX_FLAG_ATTACHED;
 
 		txr->hn_chan = chan;
 		if (bootverbose) {
 			if_printf(sc->hn_ifp, "link TX ring %d to chan%u\n",
 			    idx, vmbus_chan_id(chan));
 		}
 	}
 
 	/* Bind this channel to a proper CPU. */
 	vmbus_chan_cpu_set(chan, (sc->hn_cpu + idx) % mp_ncpus);
 
 	/*
 	 * Open this channel
 	 */
 	cbr.cbr = rxr->hn_br;
 	cbr.cbr_paddr = rxr->hn_br_dma.hv_paddr;
 	cbr.cbr_txsz = HN_TXBR_SIZE;
 	cbr.cbr_rxsz = HN_RXBR_SIZE;
 	error = vmbus_chan_open_br(chan, &cbr, NULL, 0, hn_chan_callback, rxr);
 	if (error) {
-		if_printf(sc->hn_ifp, "open chan%u failed: %d\n",
-		    vmbus_chan_id(chan), error);
-		rxr->hn_rx_flags &= ~HN_RX_FLAG_ATTACHED;
-		if (txr != NULL)
-			txr->hn_tx_flags &= ~HN_TX_FLAG_ATTACHED;
+		if (error == EISCONN) {
+			if_printf(sc->hn_ifp, "bufring is connected after "
+			    "chan%u open failure\n", vmbus_chan_id(chan));
+			rxr->hn_rx_flags |= HN_RX_FLAG_BR_REF;
+		} else {
+			if_printf(sc->hn_ifp, "open chan%u failed: %d\n",
+			    vmbus_chan_id(chan), error);
+		}
 	}
 	return (error);
 }
 
 static void
 hn_chan_detach(struct hn_softc *sc, struct vmbus_channel *chan)
 {
 	struct hn_rx_ring *rxr;
-	int idx;
+	int idx, error;
 
 	idx = vmbus_chan_subidx(chan);
 
 	/*
 	 * Link this channel to RX/TX ring.
 	 */
 	KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse,
 	    ("invalid channel index %d, should > 0 && < %d",
 	     idx, sc->hn_rx_ring_inuse));
 	rxr = &sc->hn_rx_ring[idx];
 	KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED),
 	    ("RX ring %d is not attached", idx));
 	rxr->hn_rx_flags &= ~HN_RX_FLAG_ATTACHED;
 
 	if (idx < sc->hn_tx_ring_inuse) {
 		struct hn_tx_ring *txr = &sc->hn_tx_ring[idx];
 
 		KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED),
 		    ("TX ring %d is not attached attached", idx));
 		txr->hn_tx_flags &= ~HN_TX_FLAG_ATTACHED;
 	}
 
 	/*
 	 * Close this channel.
 	 *
 	 * NOTE:
 	 * Channel closing does _not_ destroy the target channel.
 	 */
-	vmbus_chan_close(chan);
+	error = vmbus_chan_close_direct(chan);
+	if (error == EISCONN) {
+		if_printf(sc->hn_ifp, "chan%u bufring is connected "
+		    "after being closed\n", vmbus_chan_id(chan));
+		rxr->hn_rx_flags |= HN_RX_FLAG_BR_REF;
+	} else if (error) {
+		if_printf(sc->hn_ifp, "chan%u close failed: %d\n",
+		    vmbus_chan_id(chan), error);
+	}
 }
 
 static int
 hn_attach_subchans(struct hn_softc *sc)
 {
 	struct vmbus_channel **subchans;
 	int subchan_cnt = sc->hn_rx_ring_inuse - 1;
 	int i, error = 0;
 
-	if (subchan_cnt == 0)
-		return (0);
+	KASSERT(subchan_cnt > 0, ("no sub-channels"));
 
 	/* Attach the sub-channels. */
 	subchans = vmbus_subchan_get(sc->hn_prichan, subchan_cnt);
 	for (i = 0; i < subchan_cnt; ++i) {
-		error = hn_chan_attach(sc, subchans[i]);
-		if (error)
-			break;
+		int error1;
+
+		error1 = hn_chan_attach(sc, subchans[i]);
+		if (error1) {
+			error = error1;
+			/* Move on; all channels will be detached later. */
+		}
 	}
 	vmbus_subchan_rel(subchans, subchan_cnt);
 
 	if (error) {
 		if_printf(sc->hn_ifp, "sub-channels attach failed: %d\n", error);
 	} else {
 		if (bootverbose) {
 			if_printf(sc->hn_ifp, "%d sub-channels attached\n",
 			    subchan_cnt);
 		}
 	}
 	return (error);
 }
 
 static void
 hn_detach_allchans(struct hn_softc *sc)
 {
 	struct vmbus_channel **subchans;
 	int subchan_cnt = sc->hn_rx_ring_inuse - 1;
 	int i;
 
 	if (subchan_cnt == 0)
 		goto back;
 
 	/* Detach the sub-channels. */
 	subchans = vmbus_subchan_get(sc->hn_prichan, subchan_cnt);
 	for (i = 0; i < subchan_cnt; ++i)
 		hn_chan_detach(sc, subchans[i]);
 	vmbus_subchan_rel(subchans, subchan_cnt);
 
 back:
 	/*
 	 * Detach the primary channel, _after_ all sub-channels
 	 * are detached.
 	 */
 	hn_chan_detach(sc, sc->hn_prichan);
 
 	/* Wait for sub-channels to be destroyed, if any. */
 	vmbus_subchan_drain(sc->hn_prichan);
 
 #ifdef INVARIANTS
 	for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
 		KASSERT((sc->hn_rx_ring[i].hn_rx_flags &
 		    HN_RX_FLAG_ATTACHED) == 0,
 		    ("%dth RX ring is still attached", i));
 	}
 	for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
 		KASSERT((sc->hn_tx_ring[i].hn_tx_flags &
 		    HN_TX_FLAG_ATTACHED) == 0,
 		    ("%dth TX ring is still attached", i));
 	}
 #endif
 }
 
 static int
 hn_synth_alloc_subchans(struct hn_softc *sc, int *nsubch)
 {
 	struct vmbus_channel **subchans;
 	int nchan, rxr_cnt, error;
 
 	nchan = *nsubch + 1;
 	if (nchan == 1) {
 		/*
 		 * Multiple RX/TX rings are not requested.
 		 */
 		*nsubch = 0;
 		return (0);
 	}
 
 	/*
 	 * Query RSS capabilities, e.g. # of RX rings, and # of indirect
 	 * table entries.
 	 */
 	error = hn_rndis_query_rsscaps(sc, &rxr_cnt);
 	if (error) {
 		/* No RSS; this is benign. */
 		*nsubch = 0;
 		return (0);
 	}
 	if (bootverbose) {
 		if_printf(sc->hn_ifp, "RX rings offered %u, requested %d\n",
 		    rxr_cnt, nchan);
 	}
 
 	if (nchan > rxr_cnt)
 		nchan = rxr_cnt;
 	if (nchan == 1) {
 		if_printf(sc->hn_ifp, "only 1 channel is supported, no vRSS\n");
 		*nsubch = 0;
 		return (0);
 	}
 
 	/*
 	 * Allocate sub-channels from NVS.
 	 */
 	*nsubch = nchan - 1;
 	error = hn_nvs_alloc_subchans(sc, nsubch);
 	if (error || *nsubch == 0) {
 		/* Failed to allocate sub-channels. */
 		*nsubch = 0;
 		return (0);
 	}
 
 	/*
 	 * Wait for all sub-channels to become ready before moving on.
 	 */
 	subchans = vmbus_subchan_get(sc->hn_prichan, *nsubch);
 	vmbus_subchan_rel(subchans, *nsubch);
 	return (0);
 }
 
+static bool
+hn_synth_attachable(const struct hn_softc *sc)
+{
+	int i;
+
+	if (sc->hn_flags & HN_FLAG_ERRORS)
+		return (false);
+
+	for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
+		const struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
+
+		if (rxr->hn_rx_flags & HN_RX_FLAG_BR_REF)
+			return (false);
+	}
+	return (true);
+}
+
 static int
 hn_synth_attach(struct hn_softc *sc, int mtu)
 {
+#define ATTACHED_NVS		0x0002
+#define ATTACHED_RNDIS		0x0004
+
 	struct ndis_rssprm_toeplitz *rss = &sc->hn_rss;
 	int error, nsubch, nchan, i;
-	uint32_t old_caps;
+	uint32_t old_caps, attached = 0;
 
 	KASSERT((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0,
 	    ("synthetic parts were attached"));
 
+	if (!hn_synth_attachable(sc))
+		return (ENXIO);
+
 	/* Save capabilities for later verification. */
 	old_caps = sc->hn_caps;
 	sc->hn_caps = 0;
 
 	/* Clear RSS stuffs. */
 	sc->hn_rss_ind_size = 0;
 	sc->hn_rss_hash = 0;
 
 	/*
 	 * Attach the primary channel _before_ attaching NVS and RNDIS.
 	 */
 	error = hn_chan_attach(sc, sc->hn_prichan);
 	if (error)
-		return (error);
+		goto failed;
 
 	/*
 	 * Attach NVS.
 	 */
 	error = hn_nvs_attach(sc, mtu);
 	if (error)
-		return (error);
+		goto failed;
+	attached |= ATTACHED_NVS;
 
 	/*
 	 * Attach RNDIS _after_ NVS is attached.
 	 */
 	error = hn_rndis_attach(sc, mtu);
 	if (error)
-		return (error);
+		goto failed;
+	attached |= ATTACHED_RNDIS;
 
 	/*
 	 * Make sure capabilities are not changed.
 	 */
 	if (device_is_attached(sc->hn_dev) && old_caps != sc->hn_caps) {
 		if_printf(sc->hn_ifp, "caps mismatch old 0x%08x, new 0x%08x\n",
 		    old_caps, sc->hn_caps);
-		/* Restore old capabilities and abort. */
-		sc->hn_caps = old_caps;
-		return ENXIO;
+		error = ENXIO;
+		goto failed;
 	}
 
 	/*
 	 * Allocate sub-channels for multi-TX/RX rings.
 	 *
 	 * NOTE:
 	 * The # of RX rings that can be used is equivalent to the # of
 	 * channels to be requested.
 	 */
 	nsubch = sc->hn_rx_ring_cnt - 1;
 	error = hn_synth_alloc_subchans(sc, &nsubch);
 	if (error)
-		return (error);
+		goto failed;
+	/* NOTE: _Full_ synthetic parts detach is required now. */
+	sc->hn_flags |= HN_FLAG_SYNTH_ATTACHED;
 
+	/*
+	 * Set the # of TX/RX rings that could be used according to
+	 * the # of channels that NVS offered.
+	 */
 	nchan = nsubch + 1;
+	hn_set_ring_inuse(sc, nchan);
 	if (nchan == 1) {
 		/* Only the primary channel can be used; done */
 		goto back;
 	}
 
 	/*
-	 * Configure RSS key and indirect table _after_ all sub-channels
-	 * are allocated.
+	 * Attach the sub-channels.
+	 *
+	 * NOTE: hn_set_ring_inuse() _must_ have been called.
 	 */
+	error = hn_attach_subchans(sc);
+	if (error)
+		goto failed;
 
+	/*
+	 * Configure RSS key and indirect table _after_ all sub-channels
+	 * are attached.
+	 */
 	if ((sc->hn_flags & HN_FLAG_HAS_RSSKEY) == 0) {
 		/*
 		 * RSS key is not set yet; set it to the default RSS key.
 		 */
 		if (bootverbose)
 			if_printf(sc->hn_ifp, "setup default RSS key\n");
 		memcpy(rss->rss_key, hn_rss_key_default, sizeof(rss->rss_key));
 		sc->hn_flags |= HN_FLAG_HAS_RSSKEY;
 	}
 
 	if ((sc->hn_flags & HN_FLAG_HAS_RSSIND) == 0) {
 		/*
 		 * RSS indirect table is not set yet; set it up in round-
 		 * robin fashion.
 		 */
 		if (bootverbose) {
 			if_printf(sc->hn_ifp, "setup default RSS indirect "
 			    "table\n");
 		}
 		for (i = 0; i < NDIS_HASH_INDCNT; ++i)
 			rss->rss_ind[i] = i % nchan;
 		sc->hn_flags |= HN_FLAG_HAS_RSSIND;
 	} else {
 		/*
 		 * # of usable channels may be changed, so we have to
 		 * make sure that all entries in RSS indirect table
 		 * are valid.
+		 *
+		 * NOTE: hn_set_ring_inuse() _must_ have been called.
 		 */
-		hn_rss_ind_fixup(sc, nchan);
+		hn_rss_ind_fixup(sc);
 	}
 
 	error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_NONE);
-	if (error) {
-		/*
-		 * Failed to configure RSS key or indirect table; only
-		 * the primary channel can be used.
-		 */
-		nchan = 1;
-	}
+	if (error)
+		goto failed;
 back:
 	/*
-	 * Set the # of TX/RX rings that could be used according to
-	 * the # of channels that NVS offered.
-	 */
-	hn_set_ring_inuse(sc, nchan);
-
-	/*
-	 * Attach the sub-channels, if any.
-	 */
-	error = hn_attach_subchans(sc);
-	if (error)
-		return (error);
-
-	/*
 	 * Fixup transmission aggregation setup.
 	 */
 	hn_set_txagg(sc);
-
-	sc->hn_flags |= HN_FLAG_SYNTH_ATTACHED;
 	return (0);
+
+failed:
+	if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) {
+		hn_synth_detach(sc);
+	} else {
+		if (attached & ATTACHED_RNDIS)
+			hn_rndis_detach(sc);
+		if (attached & ATTACHED_NVS)
+			hn_nvs_detach(sc);
+		hn_chan_detach(sc, sc->hn_prichan);
+		/* Restore old capabilities. */
+		sc->hn_caps = old_caps;
+	}
+	return (error);
+
+#undef ATTACHED_RNDIS
+#undef ATTACHED_NVS
 }
 
 /*
  * NOTE:
  * The interface must have been suspended though hn_suspend(), before
  * this function get called.
  */
 static void
 hn_synth_detach(struct hn_softc *sc)
 {
-	HN_LOCK_ASSERT(sc);
 
 	KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED,
 	    ("synthetic parts were not attached"));
 
 	/* Detach the RNDIS first. */
 	hn_rndis_detach(sc);
 
 	/* Detach NVS. */
 	hn_nvs_detach(sc);
 
 	/* Detach all of the channels. */
 	hn_detach_allchans(sc);
 
 	sc->hn_flags &= ~HN_FLAG_SYNTH_ATTACHED;
 }
 
 static void
 hn_set_ring_inuse(struct hn_softc *sc, int ring_cnt)
 {
 	KASSERT(ring_cnt > 0 && ring_cnt <= sc->hn_rx_ring_cnt,
 	    ("invalid ring count %d", ring_cnt));
 
 	if (sc->hn_tx_ring_cnt > ring_cnt)
 		sc->hn_tx_ring_inuse = ring_cnt;
 	else
 		sc->hn_tx_ring_inuse = sc->hn_tx_ring_cnt;
 	sc->hn_rx_ring_inuse = ring_cnt;
 
 	if (bootverbose) {
 		if_printf(sc->hn_ifp, "%d TX ring, %d RX ring\n",
 		    sc->hn_tx_ring_inuse, sc->hn_rx_ring_inuse);
 	}
 }
 
 static void
 hn_chan_drain(struct hn_softc *sc, struct vmbus_channel *chan)
 {
 
 	/*
 	 * NOTE:
 	 * The TX bufring will not be drained by the hypervisor,
 	 * if the primary channel is revoked.
 	 */
 	while (!vmbus_chan_rx_empty(chan) ||
 	    (!vmbus_chan_is_revoked(sc->hn_prichan) &&
 	     !vmbus_chan_tx_empty(chan)))
 		pause("waitch", 1);
 	vmbus_chan_intr_drain(chan);
 }
 
 static void
 hn_suspend_data(struct hn_softc *sc)
 {
 	struct vmbus_channel **subch = NULL;
 	struct hn_tx_ring *txr;
 	int i, nsubch;
 
 	HN_LOCK_ASSERT(sc);
 
 	/*
 	 * Suspend TX.
 	 */
 	for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
 		txr = &sc->hn_tx_ring[i];
 
 		mtx_lock(&txr->hn_tx_lock);
 		txr->hn_suspended = 1;
 		mtx_unlock(&txr->hn_tx_lock);
 		/* No one is able send more packets now. */
 
 		/*
 		 * Wait for all pending sends to finish.
 		 *
 		 * NOTE:
 		 * We will _not_ receive all pending send-done, if the
 		 * primary channel is revoked.
 		 */
 		while (hn_tx_ring_pending(txr) &&
 		    !vmbus_chan_is_revoked(sc->hn_prichan))
 			pause("hnwtx", 1 /* 1 tick */);
 	}
 
 	/*
 	 * Disable RX by clearing RX filter.
 	 */
 	sc->hn_rx_filter = NDIS_PACKET_TYPE_NONE;
 	hn_rndis_set_rxfilter(sc, sc->hn_rx_filter);
 
 	/*
 	 * Give RNDIS enough time to flush all pending data packets.
 	 */
 	pause("waitrx", (200 * hz) / 1000);
 
 	/*
 	 * Drain RX/TX bufrings and interrupts.
 	 */
 	nsubch = sc->hn_rx_ring_inuse - 1;
 	if (nsubch > 0)
 		subch = vmbus_subchan_get(sc->hn_prichan, nsubch);
 
 	if (subch != NULL) {
 		for (i = 0; i < nsubch; ++i)
 			hn_chan_drain(sc, subch[i]);
 	}
 	hn_chan_drain(sc, sc->hn_prichan);
 
 	if (subch != NULL)
 		vmbus_subchan_rel(subch, nsubch);
 
 	/*
 	 * Drain any pending TX tasks.
 	 *
 	 * NOTE:
 	 * The above hn_chan_drain() can dispatch TX tasks, so the TX
 	 * tasks will have to be drained _after_ the above hn_chan_drain()
 	 * calls.
 	 */
 	for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
 		txr = &sc->hn_tx_ring[i];
 
 		taskqueue_drain(txr->hn_tx_taskq, &txr->hn_tx_task);
 		taskqueue_drain(txr->hn_tx_taskq, &txr->hn_txeof_task);
 	}
 }
 
 static void
 hn_suspend_mgmt_taskfunc(void *xsc, int pending __unused)
 {
 
 	((struct hn_softc *)xsc)->hn_mgmt_taskq = NULL;
 }
 
 static void
 hn_suspend_mgmt(struct hn_softc *sc)
 {
 	struct task task;
 
 	HN_LOCK_ASSERT(sc);
 
 	/*
 	 * Make sure that hn_mgmt_taskq0 can nolonger be accessed
 	 * through hn_mgmt_taskq.
 	 */
 	TASK_INIT(&task, 0, hn_suspend_mgmt_taskfunc, sc);
 	vmbus_chan_run_task(sc->hn_prichan, &task);
 
 	/*
 	 * Make sure that all pending management tasks are completed.
 	 */
 	taskqueue_drain(sc->hn_mgmt_taskq0, &sc->hn_netchg_init);
 	taskqueue_drain_timeout(sc->hn_mgmt_taskq0, &sc->hn_netchg_status);
 	taskqueue_drain_all(sc->hn_mgmt_taskq0);
 }
 
 static void
 hn_suspend(struct hn_softc *sc)
 {
 
 	if (sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING)
 		hn_suspend_data(sc);
 	hn_suspend_mgmt(sc);
 }
 
 static void
 hn_resume_tx(struct hn_softc *sc, int tx_ring_cnt)
 {
 	int i;
 
 	KASSERT(tx_ring_cnt <= sc->hn_tx_ring_cnt,
 	    ("invalid TX ring count %d", tx_ring_cnt));
 
 	for (i = 0; i < tx_ring_cnt; ++i) {
 		struct hn_tx_ring *txr = &sc->hn_tx_ring[i];
 
 		mtx_lock(&txr->hn_tx_lock);
 		txr->hn_suspended = 0;
 		mtx_unlock(&txr->hn_tx_lock);
 	}
 }
 
 static void
 hn_resume_data(struct hn_softc *sc)
 {
 	int i;
 
 	HN_LOCK_ASSERT(sc);
 
 	/*
 	 * Re-enable RX.
 	 */
 	hn_set_rxfilter(sc);
 
 	/*
 	 * Make sure to clear suspend status on "all" TX rings,
 	 * since hn_tx_ring_inuse can be changed after
 	 * hn_suspend_data().
 	 */
 	hn_resume_tx(sc, sc->hn_tx_ring_cnt);
 
 #ifdef HN_IFSTART_SUPPORT
 	if (!hn_use_if_start)
 #endif
 	{
 		/*
 		 * Flush unused drbrs, since hn_tx_ring_inuse may be
 		 * reduced.
 		 */
 		for (i = sc->hn_tx_ring_inuse; i < sc->hn_tx_ring_cnt; ++i)
 			hn_tx_ring_qflush(&sc->hn_tx_ring[i]);
 	}
 
 	/*
 	 * Kick start TX.
 	 */
 	for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
 		struct hn_tx_ring *txr = &sc->hn_tx_ring[i];
 
 		/*
 		 * Use txeof task, so that any pending oactive can be
 		 * cleared properly.
 		 */
 		taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task);
 	}
 }
 
 static void
 hn_resume_mgmt(struct hn_softc *sc)
 {
 
 	sc->hn_mgmt_taskq = sc->hn_mgmt_taskq0;
 
 	/*
 	 * Kick off network change detection, if it was pending.
 	 * If no network change was pending, start link status
 	 * checks, which is more lightweight than network change
 	 * detection.
 	 */
 	if (sc->hn_link_flags & HN_LINK_FLAG_NETCHG)
 		hn_change_network(sc);
 	else
 		hn_update_link_status(sc);
 }
 
 static void
 hn_resume(struct hn_softc *sc)
 {
 
 	if (sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING)
 		hn_resume_data(sc);
 	hn_resume_mgmt(sc);
 }
 
 static void 
 hn_rndis_rx_status(struct hn_softc *sc, const void *data, int dlen)
 {
 	const struct rndis_status_msg *msg;
 	int ofs;
 
 	if (dlen < sizeof(*msg)) {
 		if_printf(sc->hn_ifp, "invalid RNDIS status\n");
 		return;
 	}
 	msg = data;
 
 	switch (msg->rm_status) {
 	case RNDIS_STATUS_MEDIA_CONNECT:
 	case RNDIS_STATUS_MEDIA_DISCONNECT:
 		hn_update_link_status(sc);
 		break;
 
 	case RNDIS_STATUS_TASK_OFFLOAD_CURRENT_CONFIG:
 		/* Not really useful; ignore. */
 		break;
 
 	case RNDIS_STATUS_NETWORK_CHANGE:
 		ofs = RNDIS_STBUFOFFSET_ABS(msg->rm_stbufoffset);
 		if (dlen < ofs + msg->rm_stbuflen ||
 		    msg->rm_stbuflen < sizeof(uint32_t)) {
 			if_printf(sc->hn_ifp, "network changed\n");
 		} else {
 			uint32_t change;
 
 			memcpy(&change, ((const uint8_t *)msg) + ofs,
 			    sizeof(change));
 			if_printf(sc->hn_ifp, "network changed, change %u\n",
 			    change);
 		}
 		hn_change_network(sc);
 		break;
 
 	default:
 		if_printf(sc->hn_ifp, "unknown RNDIS status 0x%08x\n",
 		    msg->rm_status);
 		break;
 	}
 }
 
 static int
 hn_rndis_rxinfo(const void *info_data, int info_dlen, struct hn_rxinfo *info)
 {
 	const struct rndis_pktinfo *pi = info_data;
 	uint32_t mask = 0;
 
 	while (info_dlen != 0) {
 		const void *data;
 		uint32_t dlen;
 
 		if (__predict_false(info_dlen < sizeof(*pi)))
 			return (EINVAL);
 		if (__predict_false(info_dlen < pi->rm_size))
 			return (EINVAL);
 		info_dlen -= pi->rm_size;
 
 		if (__predict_false(pi->rm_size & RNDIS_PKTINFO_SIZE_ALIGNMASK))
 			return (EINVAL);
 		if (__predict_false(pi->rm_size < pi->rm_pktinfooffset))
 			return (EINVAL);
 		dlen = pi->rm_size - pi->rm_pktinfooffset;
 		data = pi->rm_data;
 
 		switch (pi->rm_type) {
 		case NDIS_PKTINFO_TYPE_VLAN:
 			if (__predict_false(dlen < NDIS_VLAN_INFO_SIZE))
 				return (EINVAL);
 			info->vlan_info = *((const uint32_t *)data);
 			mask |= HN_RXINFO_VLAN;
 			break;
 
 		case NDIS_PKTINFO_TYPE_CSUM:
 			if (__predict_false(dlen < NDIS_RXCSUM_INFO_SIZE))
 				return (EINVAL);
 			info->csum_info = *((const uint32_t *)data);
 			mask |= HN_RXINFO_CSUM;
 			break;
 
 		case HN_NDIS_PKTINFO_TYPE_HASHVAL:
 			if (__predict_false(dlen < HN_NDIS_HASH_VALUE_SIZE))
 				return (EINVAL);
 			info->hash_value = *((const uint32_t *)data);
 			mask |= HN_RXINFO_HASHVAL;
 			break;
 
 		case HN_NDIS_PKTINFO_TYPE_HASHINF:
 			if (__predict_false(dlen < HN_NDIS_HASH_INFO_SIZE))
 				return (EINVAL);
 			info->hash_info = *((const uint32_t *)data);
 			mask |= HN_RXINFO_HASHINF;
 			break;
 
 		default:
 			goto next;
 		}
 
 		if (mask == HN_RXINFO_ALL) {
 			/* All found; done */
 			break;
 		}
 next:
 		pi = (const struct rndis_pktinfo *)
 		    ((const uint8_t *)pi + pi->rm_size);
 	}
 
 	/*
 	 * Final fixup.
 	 * - If there is no hash value, invalidate the hash info.
 	 */
 	if ((mask & HN_RXINFO_HASHVAL) == 0)
 		info->hash_info = HN_NDIS_HASH_INFO_INVALID;
 	return (0);
 }
 
 static __inline bool
 hn_rndis_check_overlap(int off, int len, int check_off, int check_len)
 {
 
 	if (off < check_off) {
 		if (__predict_true(off + len <= check_off))
 			return (false);
 	} else if (off > check_off) {
 		if (__predict_true(check_off + check_len <= off))
 			return (false);
 	}
 	return (true);
 }
 
 static void
 hn_rndis_rx_data(struct hn_rx_ring *rxr, const void *data, int dlen)
 {
 	const struct rndis_packet_msg *pkt;
 	struct hn_rxinfo info;
 	int data_off, pktinfo_off, data_len, pktinfo_len;
 
 	/*
 	 * Check length.
 	 */
 	if (__predict_false(dlen < sizeof(*pkt))) {
 		if_printf(rxr->hn_ifp, "invalid RNDIS packet msg\n");
 		return;
 	}
 	pkt = data;
 
 	if (__predict_false(dlen < pkt->rm_len)) {
 		if_printf(rxr->hn_ifp, "truncated RNDIS packet msg, "
 		    "dlen %d, msglen %u\n", dlen, pkt->rm_len);
 		return;
 	}
 	if (__predict_false(pkt->rm_len <
 	    pkt->rm_datalen + pkt->rm_oobdatalen + pkt->rm_pktinfolen)) {
 		if_printf(rxr->hn_ifp, "invalid RNDIS packet msglen, "
 		    "msglen %u, data %u, oob %u, pktinfo %u\n",
 		    pkt->rm_len, pkt->rm_datalen, pkt->rm_oobdatalen,
 		    pkt->rm_pktinfolen);
 		return;
 	}
 	if (__predict_false(pkt->rm_datalen == 0)) {
 		if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, no data\n");
 		return;
 	}
 
 	/*
 	 * Check offests.
 	 */
 #define IS_OFFSET_INVALID(ofs)			\
 	((ofs) < RNDIS_PACKET_MSG_OFFSET_MIN ||	\
 	 ((ofs) & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK))
 
 	/* XXX Hyper-V does not meet data offset alignment requirement */
 	if (__predict_false(pkt->rm_dataoffset < RNDIS_PACKET_MSG_OFFSET_MIN)) {
 		if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
 		    "data offset %u\n", pkt->rm_dataoffset);
 		return;
 	}
 	if (__predict_false(pkt->rm_oobdataoffset > 0 &&
 	    IS_OFFSET_INVALID(pkt->rm_oobdataoffset))) {
 		if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
 		    "oob offset %u\n", pkt->rm_oobdataoffset);
 		return;
 	}
 	if (__predict_true(pkt->rm_pktinfooffset > 0) &&
 	    __predict_false(IS_OFFSET_INVALID(pkt->rm_pktinfooffset))) {
 		if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
 		    "pktinfo offset %u\n", pkt->rm_pktinfooffset);
 		return;
 	}
 
 #undef IS_OFFSET_INVALID
 
 	data_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_dataoffset);
 	data_len = pkt->rm_datalen;
 	pktinfo_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_pktinfooffset);
 	pktinfo_len = pkt->rm_pktinfolen;
 
 	/*
 	 * Check OOB coverage.
 	 */
 	if (__predict_false(pkt->rm_oobdatalen != 0)) {
 		int oob_off, oob_len;
 
 		if_printf(rxr->hn_ifp, "got oobdata\n");
 		oob_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_oobdataoffset);
 		oob_len = pkt->rm_oobdatalen;
 
 		if (__predict_false(oob_off + oob_len > pkt->rm_len)) {
 			if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
 			    "oob overflow, msglen %u, oob abs %d len %d\n",
 			    pkt->rm_len, oob_off, oob_len);
 			return;
 		}
 
 		/*
 		 * Check against data.
 		 */
 		if (hn_rndis_check_overlap(oob_off, oob_len,
 		    data_off, data_len)) {
 			if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
 			    "oob overlaps data, oob abs %d len %d, "
 			    "data abs %d len %d\n",
 			    oob_off, oob_len, data_off, data_len);
 			return;
 		}
 
 		/*
 		 * Check against pktinfo.
 		 */
 		if (pktinfo_len != 0 &&
 		    hn_rndis_check_overlap(oob_off, oob_len,
 		    pktinfo_off, pktinfo_len)) {
 			if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
 			    "oob overlaps pktinfo, oob abs %d len %d, "
 			    "pktinfo abs %d len %d\n",
 			    oob_off, oob_len, pktinfo_off, pktinfo_len);
 			return;
 		}
 	}
 
 	/*
 	 * Check per-packet-info coverage and find useful per-packet-info.
 	 */
 	info.vlan_info = HN_NDIS_VLAN_INFO_INVALID;
 	info.csum_info = HN_NDIS_RXCSUM_INFO_INVALID;
 	info.hash_info = HN_NDIS_HASH_INFO_INVALID;
 	if (__predict_true(pktinfo_len != 0)) {
 		bool overlap;
 		int error;
 
 		if (__predict_false(pktinfo_off + pktinfo_len > pkt->rm_len)) {
 			if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
 			    "pktinfo overflow, msglen %u, "
 			    "pktinfo abs %d len %d\n",
 			    pkt->rm_len, pktinfo_off, pktinfo_len);
 			return;
 		}
 
 		/*
 		 * Check packet info coverage.
 		 */
 		overlap = hn_rndis_check_overlap(pktinfo_off, pktinfo_len,
 		    data_off, data_len);
 		if (__predict_false(overlap)) {
 			if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
 			    "pktinfo overlap data, pktinfo abs %d len %d, "
 			    "data abs %d len %d\n",
 			    pktinfo_off, pktinfo_len, data_off, data_len);
 			return;
 		}
 
 		/*
 		 * Find useful per-packet-info.
 		 */
 		error = hn_rndis_rxinfo(((const uint8_t *)pkt) + pktinfo_off,
 		    pktinfo_len, &info);
 		if (__predict_false(error)) {
 			if_printf(rxr->hn_ifp, "invalid RNDIS packet msg "
 			    "pktinfo\n");
 			return;
 		}
 	}
 
 	if (__predict_false(data_off + data_len > pkt->rm_len)) {
 		if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
 		    "data overflow, msglen %u, data abs %d len %d\n",
 		    pkt->rm_len, data_off, data_len);
 		return;
 	}
 	hn_rxpkt(rxr, ((const uint8_t *)pkt) + data_off, data_len, &info);
 }
 
 static __inline void
 hn_rndis_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen)
 {
 	const struct rndis_msghdr *hdr;
 
 	if (__predict_false(dlen < sizeof(*hdr))) {
 		if_printf(rxr->hn_ifp, "invalid RNDIS msg\n");
 		return;
 	}
 	hdr = data;
 
 	if (__predict_true(hdr->rm_type == REMOTE_NDIS_PACKET_MSG)) {
 		/* Hot data path. */
 		hn_rndis_rx_data(rxr, data, dlen);
 		/* Done! */
 		return;
 	}
 
 	if (hdr->rm_type == REMOTE_NDIS_INDICATE_STATUS_MSG)
 		hn_rndis_rx_status(rxr->hn_ifp->if_softc, data, dlen);
 	else
 		hn_rndis_rx_ctrl(rxr->hn_ifp->if_softc, data, dlen);
 }
 
 static void
 hn_nvs_handle_notify(struct hn_softc *sc, const struct vmbus_chanpkt_hdr *pkt)
 {
 	const struct hn_nvs_hdr *hdr;
 
 	if (VMBUS_CHANPKT_DATALEN(pkt) < sizeof(*hdr)) {
 		if_printf(sc->hn_ifp, "invalid nvs notify\n");
 		return;
 	}
 	hdr = VMBUS_CHANPKT_CONST_DATA(pkt);
 
 	if (hdr->nvs_type == HN_NVS_TYPE_TXTBL_NOTE) {
 		/* Useless; ignore */
 		return;
 	}
 	if_printf(sc->hn_ifp, "got notify, nvs type %u\n", hdr->nvs_type);
 }
 
 static void
 hn_nvs_handle_comp(struct hn_softc *sc, struct vmbus_channel *chan,
     const struct vmbus_chanpkt_hdr *pkt)
 {
 	struct hn_nvs_sendctx *sndc;
 
 	sndc = (struct hn_nvs_sendctx *)(uintptr_t)pkt->cph_xactid;
 	sndc->hn_cb(sndc, sc, chan, VMBUS_CHANPKT_CONST_DATA(pkt),
 	    VMBUS_CHANPKT_DATALEN(pkt));
 	/*
 	 * NOTE:
 	 * 'sndc' CAN NOT be accessed anymore, since it can be freed by
 	 * its callback.
 	 */
 }
 
 static void
 hn_nvs_handle_rxbuf(struct hn_rx_ring *rxr, struct vmbus_channel *chan,
     const struct vmbus_chanpkt_hdr *pkthdr)
 {
 	const struct vmbus_chanpkt_rxbuf *pkt;
 	const struct hn_nvs_hdr *nvs_hdr;
 	int count, i, hlen;
 
 	if (__predict_false(VMBUS_CHANPKT_DATALEN(pkthdr) < sizeof(*nvs_hdr))) {
 		if_printf(rxr->hn_ifp, "invalid nvs RNDIS\n");
 		return;
 	}
 	nvs_hdr = VMBUS_CHANPKT_CONST_DATA(pkthdr);
 
 	/* Make sure that this is a RNDIS message. */
 	if (__predict_false(nvs_hdr->nvs_type != HN_NVS_TYPE_RNDIS)) {
 		if_printf(rxr->hn_ifp, "nvs type %u, not RNDIS\n",
 		    nvs_hdr->nvs_type);
 		return;
 	}
 
 	hlen = VMBUS_CHANPKT_GETLEN(pkthdr->cph_hlen);
 	if (__predict_false(hlen < sizeof(*pkt))) {
 		if_printf(rxr->hn_ifp, "invalid rxbuf chanpkt\n");
 		return;
 	}
 	pkt = (const struct vmbus_chanpkt_rxbuf *)pkthdr;
 
 	if (__predict_false(pkt->cp_rxbuf_id != HN_NVS_RXBUF_SIG)) {
 		if_printf(rxr->hn_ifp, "invalid rxbuf_id 0x%08x\n",
 		    pkt->cp_rxbuf_id);
 		return;
 	}
 
 	count = pkt->cp_rxbuf_cnt;
 	if (__predict_false(hlen <
 	    __offsetof(struct vmbus_chanpkt_rxbuf, cp_rxbuf[count]))) {
 		if_printf(rxr->hn_ifp, "invalid rxbuf_cnt %d\n", count);
 		return;
 	}
 
 	/* Each range represents 1 RNDIS pkt that contains 1 Ethernet frame */
 	for (i = 0; i < count; ++i) {
 		int ofs, len;
 
 		ofs = pkt->cp_rxbuf[i].rb_ofs;
 		len = pkt->cp_rxbuf[i].rb_len;
 		if (__predict_false(ofs + len > HN_RXBUF_SIZE)) {
 			if_printf(rxr->hn_ifp, "%dth RNDIS msg overflow rxbuf, "
 			    "ofs %d, len %d\n", i, ofs, len);
 			continue;
 		}
 		hn_rndis_rxpkt(rxr, rxr->hn_rxbuf + ofs, len);
 	}
 
 	/*
 	 * Ack the consumed RXBUF associated w/ this channel packet,
 	 * so that this RXBUF can be recycled by the hypervisor.
 	 */
 	hn_nvs_ack_rxbuf(rxr, chan, pkt->cp_hdr.cph_xactid);
 }
 
 static void
 hn_nvs_ack_rxbuf(struct hn_rx_ring *rxr, struct vmbus_channel *chan,
     uint64_t tid)
 {
 	struct hn_nvs_rndis_ack ack;
 	int retries, error;
 	
 	ack.nvs_type = HN_NVS_TYPE_RNDIS_ACK;
 	ack.nvs_status = HN_NVS_STATUS_OK;
 
 	retries = 0;
 again:
 	error = vmbus_chan_send(chan, VMBUS_CHANPKT_TYPE_COMP,
 	    VMBUS_CHANPKT_FLAG_NONE, &ack, sizeof(ack), tid);
 	if (__predict_false(error == EAGAIN)) {
 		/*
 		 * NOTE:
 		 * This should _not_ happen in real world, since the
 		 * consumption of the TX bufring from the TX path is
 		 * controlled.
 		 */
 		if (rxr->hn_ack_failed == 0)
 			if_printf(rxr->hn_ifp, "RXBUF ack retry\n");
 		rxr->hn_ack_failed++;
 		retries++;
 		if (retries < 10) {
 			DELAY(100);
 			goto again;
 		}
 		/* RXBUF leaks! */
 		if_printf(rxr->hn_ifp, "RXBUF ack failed\n");
 	}
 }
 
 static void
 hn_chan_callback(struct vmbus_channel *chan, void *xrxr)
 {
 	struct hn_rx_ring *rxr = xrxr;
 	struct hn_softc *sc = rxr->hn_ifp->if_softc;
 
 	for (;;) {
 		struct vmbus_chanpkt_hdr *pkt = rxr->hn_pktbuf;
 		int error, pktlen;
 
 		pktlen = rxr->hn_pktbuf_len;
 		error = vmbus_chan_recv_pkt(chan, pkt, &pktlen);
 		if (__predict_false(error == ENOBUFS)) {
 			void *nbuf;
 			int nlen;
 
 			/*
 			 * Expand channel packet buffer.
 			 *
 			 * XXX
 			 * Use M_WAITOK here, since allocation failure
 			 * is fatal.
 			 */
 			nlen = rxr->hn_pktbuf_len * 2;
 			while (nlen < pktlen)
 				nlen *= 2;
 			nbuf = malloc(nlen, M_DEVBUF, M_WAITOK);
 
 			if_printf(rxr->hn_ifp, "expand pktbuf %d -> %d\n",
 			    rxr->hn_pktbuf_len, nlen);
 
 			free(rxr->hn_pktbuf, M_DEVBUF);
 			rxr->hn_pktbuf = nbuf;
 			rxr->hn_pktbuf_len = nlen;
 			/* Retry! */
 			continue;
 		} else if (__predict_false(error == EAGAIN)) {
 			/* No more channel packets; done! */
 			break;
 		}
 		KASSERT(!error, ("vmbus_chan_recv_pkt failed: %d", error));
 
 		switch (pkt->cph_type) {
 		case VMBUS_CHANPKT_TYPE_COMP:
 			hn_nvs_handle_comp(sc, chan, pkt);
 			break;
 
 		case VMBUS_CHANPKT_TYPE_RXBUF:
 			hn_nvs_handle_rxbuf(rxr, chan, pkt);
 			break;
 
 		case VMBUS_CHANPKT_TYPE_INBAND:
 			hn_nvs_handle_notify(sc, pkt);
 			break;
 
 		default:
 			if_printf(rxr->hn_ifp, "unknown chan pkt %u\n",
 			    pkt->cph_type);
 			break;
 		}
 	}
 	hn_chan_rollup(rxr, rxr->hn_txr);
 }
 
 static void
 hn_tx_taskq_create(void *arg __unused)
 {
 
 	if (vm_guest != VM_GUEST_HV)
 		return;
 
 	if (!hn_share_tx_taskq)
 		return;
 
 	hn_tx_taskq = taskqueue_create("hn_tx", M_WAITOK,
 	    taskqueue_thread_enqueue, &hn_tx_taskq);
 	if (hn_bind_tx_taskq >= 0) {
 		int cpu = hn_bind_tx_taskq;
 		cpuset_t cpu_set;
 
 		if (cpu > mp_ncpus - 1)
 			cpu = mp_ncpus - 1;
 		CPU_SETOF(cpu, &cpu_set);
 		taskqueue_start_threads_cpuset(&hn_tx_taskq, 1, PI_NET,
 		    &cpu_set, "hn tx");
 	} else {
 		taskqueue_start_threads(&hn_tx_taskq, 1, PI_NET, "hn tx");
 	}
 }
 SYSINIT(hn_txtq_create, SI_SUB_DRIVERS, SI_ORDER_SECOND,
     hn_tx_taskq_create, NULL);
 
 static void
 hn_tx_taskq_destroy(void *arg __unused)
 {
 
 	if (hn_tx_taskq != NULL)
 		taskqueue_free(hn_tx_taskq);
 }
 SYSUNINIT(hn_txtq_destroy, SI_SUB_DRIVERS, SI_ORDER_SECOND,
     hn_tx_taskq_destroy, NULL);
Index: projects/clang391-import/sys/dev/hyperv/netvsc/if_hnvar.h
===================================================================
--- projects/clang391-import/sys/dev/hyperv/netvsc/if_hnvar.h	(revision 309262)
+++ projects/clang391-import/sys/dev/hyperv/netvsc/if_hnvar.h	(revision 309263)
@@ -1,273 +1,278 @@
 /*-
  * Copyright (c) 2016 Microsoft Corp.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _IF_HNVAR_H_
 #define _IF_HNVAR_H_
 
 #define HN_USE_TXDESC_BUFRING
 
 #define HN_CHIM_SIZE			(15 * 1024 * 1024)
 
 #define HN_RXBUF_SIZE			(16 * 1024 * 1024)
 #define HN_RXBUF_SIZE_COMPAT		(15 * 1024 * 1024)
 
 /* Claimed to be 12232B */
 #define HN_MTU_MAX			(9 * 1024)
 
 #define HN_TXBR_SIZE			(128 * PAGE_SIZE)
 #define HN_RXBR_SIZE			(128 * PAGE_SIZE)
 
 #define HN_XACT_REQ_PGCNT		2
 #define HN_XACT_RESP_PGCNT		2
 #define HN_XACT_REQ_SIZE		(HN_XACT_REQ_PGCNT * PAGE_SIZE)
 #define HN_XACT_RESP_SIZE		(HN_XACT_RESP_PGCNT * PAGE_SIZE)
 
 #define HN_GPACNT_MAX			32
 
 struct hn_txdesc;
 #ifndef HN_USE_TXDESC_BUFRING
 SLIST_HEAD(hn_txdesc_list, hn_txdesc);
 #else
 struct buf_ring;
 #endif
 struct hn_tx_ring;
 
 struct hn_rx_ring {
 	struct ifnet	*hn_ifp;
 	struct hn_tx_ring *hn_txr;
 	void		*hn_pktbuf;
 	int		hn_pktbuf_len;
 	uint8_t		*hn_rxbuf;	/* shadow sc->hn_rxbuf */
 	int		hn_rx_idx;
 
 	/* Trust csum verification on host side */
 	int		hn_trust_hcsum;	/* HN_TRUST_HCSUM_ */
 	struct lro_ctrl	hn_lro;
 
 	u_long		hn_csum_ip;
 	u_long		hn_csum_tcp;
 	u_long		hn_csum_udp;
 	u_long		hn_csum_trusted;
 	u_long		hn_lro_tried;
 	u_long		hn_small_pkts;
 	u_long		hn_pkts;
 	u_long		hn_rss_pkts;
 	u_long		hn_ack_failed;
 
 	/* Rarely used stuffs */
 	struct sysctl_oid *hn_rx_sysctl_tree;
 	int		hn_rx_flags;
 
 	void		*hn_br;		/* TX/RX bufring */
 	struct hyperv_dma hn_br_dma;
 } __aligned(CACHE_LINE_SIZE);
 
 #define HN_TRUST_HCSUM_IP	0x0001
 #define HN_TRUST_HCSUM_TCP	0x0002
 #define HN_TRUST_HCSUM_UDP	0x0004
 
-#define HN_RX_FLAG_ATTACHED	0x1
+#define HN_RX_FLAG_ATTACHED	0x0001
+#define HN_RX_FLAG_BR_REF	0x0002
 
 struct hn_tx_ring {
 #ifndef HN_USE_TXDESC_BUFRING
 	struct mtx	hn_txlist_spin;
 	struct hn_txdesc_list hn_txlist;
 #else
 	struct buf_ring	*hn_txdesc_br;
 #endif
 	int		hn_txdesc_cnt;
 	int		hn_txdesc_avail;
 	u_short		hn_has_txeof;
 	u_short		hn_txdone_cnt;
 
 	int		hn_sched_tx;
 	void		(*hn_txeof)(struct hn_tx_ring *);
 	struct taskqueue *hn_tx_taskq;
 	struct task	hn_tx_task;
 	struct task	hn_txeof_task;
 
 	struct buf_ring	*hn_mbuf_br;
 	int		hn_oactive;
 	int		hn_tx_idx;
 	int		hn_tx_flags;
 
 	struct mtx	hn_tx_lock;
 	struct hn_softc	*hn_sc;
 	struct vmbus_channel *hn_chan;
 
 	int		hn_direct_tx_size;
 	int		hn_chim_size;
 	bus_dma_tag_t	hn_tx_data_dtag;
 	uint64_t	hn_csum_assist;
 
 	/* Applied packet transmission aggregation limits. */
 	int		hn_agg_szmax;
 	short		hn_agg_pktmax;
 	short		hn_agg_align;
 
 	/* Packet transmission aggregation states. */
 	struct hn_txdesc *hn_agg_txd;
 	int		hn_agg_szleft;
 	short		hn_agg_pktleft;
 	struct rndis_packet_msg *hn_agg_prevpkt;
 
 	/* Temporary stats for each sends. */
 	int		hn_stat_size;
 	short		hn_stat_pkts;
 	short		hn_stat_mcasts;
 
 	int		(*hn_sendpkt)(struct hn_tx_ring *, struct hn_txdesc *);
 	int		hn_suspended;
 	int		hn_gpa_cnt;
 	struct vmbus_gpa hn_gpa[HN_GPACNT_MAX];
 
 	u_long		hn_no_txdescs;
 	u_long		hn_send_failed;
 	u_long		hn_txdma_failed;
 	u_long		hn_tx_collapsed;
 	u_long		hn_tx_chimney_tried;
 	u_long		hn_tx_chimney;
 	u_long		hn_pkts;
 	u_long		hn_sends;
 	u_long		hn_flush_failed;
 
 	/* Rarely used stuffs */
 	struct hn_txdesc *hn_txdesc;
 	bus_dma_tag_t	hn_tx_rndis_dtag;
 	struct sysctl_oid *hn_tx_sysctl_tree;
 } __aligned(CACHE_LINE_SIZE);
 
-#define HN_TX_FLAG_ATTACHED	0x1
-#define HN_TX_FLAG_HASHVAL	0x2	/* support HASHVAL pktinfo */
+#define HN_TX_FLAG_ATTACHED	0x0001
+#define HN_TX_FLAG_HASHVAL	0x0002	/* support HASHVAL pktinfo */
 
 /*
  * Device-specific softc structure
  */
 struct hn_softc {
 	struct ifnet    *hn_ifp;
 	struct ifmedia	hn_media;
 	device_t        hn_dev;
 	int             hn_if_flags;
 	struct sx	hn_lock;
 	struct vmbus_channel *hn_prichan;
 
 	int		hn_rx_ring_cnt;
 	int		hn_rx_ring_inuse;
 	struct hn_rx_ring *hn_rx_ring;
 
 	int		hn_tx_ring_cnt;
 	int		hn_tx_ring_inuse;
 	struct hn_tx_ring *hn_tx_ring;
 
 	uint8_t		*hn_chim;
 	u_long		*hn_chim_bmap;
 	int		hn_chim_bmap_cnt;
 	int		hn_chim_cnt;
 	int		hn_chim_szmax;
 
 	int		hn_cpu;
 	struct taskqueue *hn_tx_taskq;
 	struct sysctl_oid *hn_tx_sysctl_tree;
 	struct sysctl_oid *hn_rx_sysctl_tree;
 	struct vmbus_xact_ctx *hn_xact;
 	uint32_t	hn_nvs_ver;
 	uint32_t	hn_rx_filter;
 
 	/* Packet transmission aggregation user settings. */
 	int			hn_agg_size;
 	int			hn_agg_pkts;
 
 	struct taskqueue	*hn_mgmt_taskq;
 	struct taskqueue	*hn_mgmt_taskq0;
 	struct task		hn_link_task;
 	struct task		hn_netchg_init;
 	struct timeout_task	hn_netchg_status;
 	uint32_t		hn_link_flags;	/* HN_LINK_FLAG_ */
 
 	uint32_t		hn_caps;	/* HN_CAP_ */
 	uint32_t		hn_flags;	/* HN_FLAG_ */
 	void			*hn_rxbuf;
 	uint32_t		hn_rxbuf_gpadl;
 	struct hyperv_dma	hn_rxbuf_dma;
 
 	uint32_t		hn_chim_gpadl;
 	struct hyperv_dma	hn_chim_dma;
 
 	uint32_t		hn_rndis_rid;
 	uint32_t		hn_ndis_ver;
 	int			hn_ndis_tso_szmax;
 	int			hn_ndis_tso_sgmin;
 	uint32_t		hn_rndis_agg_size;
 	uint32_t		hn_rndis_agg_pkts;
 	uint32_t		hn_rndis_agg_align;
 
 	int			hn_rss_ind_size;
 	uint32_t		hn_rss_hash;	/* NDIS_HASH_ */
 	struct ndis_rssprm_toeplitz hn_rss;
 };
 
 #define HN_FLAG_RXBUF_CONNECTED		0x0001
 #define HN_FLAG_CHIM_CONNECTED		0x0002
 #define HN_FLAG_HAS_RSSKEY		0x0004
 #define HN_FLAG_HAS_RSSIND		0x0008
 #define HN_FLAG_SYNTH_ATTACHED		0x0010
 #define HN_FLAG_NO_SLEEPING		0x0020
+#define HN_FLAG_RXBUF_REF		0x0040
+#define HN_FLAG_CHIM_REF		0x0080
+
+#define HN_FLAG_ERRORS			(HN_FLAG_RXBUF_REF | HN_FLAG_CHIM_REF)
 
 #define HN_NO_SLEEPING(sc)			\
 do {						\
 	(sc)->hn_flags |= HN_FLAG_NO_SLEEPING;	\
 } while (0)
 
 #define HN_SLEEPING_OK(sc)			\
 do {						\
 	(sc)->hn_flags &= ~HN_FLAG_NO_SLEEPING;	\
 } while (0)
 
 #define HN_CAN_SLEEP(sc)		\
 	(((sc)->hn_flags & HN_FLAG_NO_SLEEPING) == 0)
 
 #define HN_CAP_VLAN			0x0001
 #define HN_CAP_MTU			0x0002
 #define HN_CAP_IPCS			0x0004
 #define HN_CAP_TCP4CS			0x0008
 #define HN_CAP_TCP6CS			0x0010
 #define HN_CAP_UDP4CS			0x0020
 #define HN_CAP_UDP6CS			0x0040
 #define HN_CAP_TSO4			0x0080
 #define HN_CAP_TSO6			0x0100
 #define HN_CAP_HASHVAL			0x0200
 
 /* Capability description for use with printf(9) %b identifier. */
 #define HN_CAP_BITS				\
 	"\020\1VLAN\2MTU\3IPCS\4TCP4CS\5TCP6CS"	\
 	"\6UDP4CS\7UDP6CS\10TSO4\11TSO6\12HASHVAL"
 
 #define HN_LINK_FLAG_LINKUP		0x0001
 #define HN_LINK_FLAG_NETCHG		0x0002
 
 #endif	/* !_IF_HNVAR_H_ */
Index: projects/clang391-import/sys/dev/hyperv/vmbus/vmbus.c
===================================================================
--- projects/clang391-import/sys/dev/hyperv/vmbus/vmbus.c	(revision 309262)
+++ projects/clang391-import/sys/dev/hyperv/vmbus/vmbus.c	(revision 309263)
@@ -1,1462 +1,1483 @@
 /*-
  * Copyright (c) 2009-2012,2016 Microsoft Corp.
  * Copyright (c) 2012 NetApp Inc.
  * Copyright (c) 2012 Citrix Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 /*
  * VM Bus Driver Implementation
  */
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/mutex.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 #include <sys/taskqueue.h>
 
 #include <machine/bus.h>
 #include <machine/intr_machdep.h>
 #include <machine/resource.h>
 #include <x86/include/apicvar.h>
 
 #include <contrib/dev/acpica/include/acpi.h>
 #include <dev/acpica/acpivar.h>
 
 #include <dev/hyperv/include/hyperv.h>
 #include <dev/hyperv/include/vmbus_xact.h>
 #include <dev/hyperv/vmbus/hyperv_reg.h>
 #include <dev/hyperv/vmbus/hyperv_var.h>
 #include <dev/hyperv/vmbus/vmbus_reg.h>
 #include <dev/hyperv/vmbus/vmbus_var.h>
 #include <dev/hyperv/vmbus/vmbus_chanvar.h>
 
 #include "acpi_if.h"
 #include "pcib_if.h"
 #include "vmbus_if.h"
 
 #define VMBUS_GPADL_START		0xe1e10
 
 struct vmbus_msghc {
 	struct vmbus_xact		*mh_xact;
 	struct hypercall_postmsg_in	mh_inprm_save;
 };
 
 static int			vmbus_probe(device_t);
 static int			vmbus_attach(device_t);
 static int			vmbus_detach(device_t);
 static int			vmbus_read_ivar(device_t, device_t, int,
 				    uintptr_t *);
 static int			vmbus_child_pnpinfo_str(device_t, device_t,
 				    char *, size_t);
 static struct resource		*vmbus_alloc_resource(device_t dev,
 				    device_t child, int type, int *rid,
 				    rman_res_t start, rman_res_t end,
 				    rman_res_t count, u_int flags);
 static int			vmbus_alloc_msi(device_t bus, device_t dev,
 				    int count, int maxcount, int *irqs);
 static int			vmbus_release_msi(device_t bus, device_t dev,
 				    int count, int *irqs);
 static int			vmbus_alloc_msix(device_t bus, device_t dev,
 				    int *irq);
 static int			vmbus_release_msix(device_t bus, device_t dev,
 				    int irq);
 static int			vmbus_map_msi(device_t bus, device_t dev,
 				    int irq, uint64_t *addr, uint32_t *data);
 static uint32_t			vmbus_get_version_method(device_t, device_t);
 static int			vmbus_probe_guid_method(device_t, device_t,
 				    const struct hyperv_guid *);
 static uint32_t			vmbus_get_vcpu_id_method(device_t bus,
 				    device_t dev, int cpu);
 
 static int			vmbus_init(struct vmbus_softc *);
 static int			vmbus_connect(struct vmbus_softc *, uint32_t);
 static int			vmbus_req_channels(struct vmbus_softc *sc);
 static void			vmbus_disconnect(struct vmbus_softc *);
 static int			vmbus_scan(struct vmbus_softc *);
 static void			vmbus_scan_teardown(struct vmbus_softc *);
 static void			vmbus_scan_done(struct vmbus_softc *,
 				    const struct vmbus_message *);
 static void			vmbus_chanmsg_handle(struct vmbus_softc *,
 				    const struct vmbus_message *);
 static void			vmbus_msg_task(void *, int);
 static void			vmbus_synic_setup(void *);
 static void			vmbus_synic_teardown(void *);
 static int			vmbus_sysctl_version(SYSCTL_HANDLER_ARGS);
 static int			vmbus_dma_alloc(struct vmbus_softc *);
 static void			vmbus_dma_free(struct vmbus_softc *);
 static int			vmbus_intr_setup(struct vmbus_softc *);
 static void			vmbus_intr_teardown(struct vmbus_softc *);
 static int			vmbus_doattach(struct vmbus_softc *);
 static void			vmbus_event_proc_dummy(struct vmbus_softc *,
 				    int);
 
 static struct vmbus_softc	*vmbus_sc;
 
 extern inthand_t IDTVEC(vmbus_isr);
 
 static const uint32_t		vmbus_version[] = {
 	VMBUS_VERSION_WIN8_1,
 	VMBUS_VERSION_WIN8,
 	VMBUS_VERSION_WIN7,
 	VMBUS_VERSION_WS2008
 };
 
 static const vmbus_chanmsg_proc_t
 vmbus_chanmsg_handlers[VMBUS_CHANMSG_TYPE_MAX] = {
 	VMBUS_CHANMSG_PROC(CHOFFER_DONE, vmbus_scan_done),
 	VMBUS_CHANMSG_PROC_WAKEUP(CONNECT_RESP)
 };
 
 static device_method_t vmbus_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,			vmbus_probe),
 	DEVMETHOD(device_attach,		vmbus_attach),
 	DEVMETHOD(device_detach,		vmbus_detach),
 	DEVMETHOD(device_shutdown,		bus_generic_shutdown),
 	DEVMETHOD(device_suspend,		bus_generic_suspend),
 	DEVMETHOD(device_resume,		bus_generic_resume),
 
 	/* Bus interface */
 	DEVMETHOD(bus_add_child,		bus_generic_add_child),
 	DEVMETHOD(bus_print_child,		bus_generic_print_child),
 	DEVMETHOD(bus_read_ivar,		vmbus_read_ivar),
 	DEVMETHOD(bus_child_pnpinfo_str,	vmbus_child_pnpinfo_str),
 	DEVMETHOD(bus_alloc_resource,		vmbus_alloc_resource),
 	DEVMETHOD(bus_release_resource,		bus_generic_release_resource),
 	DEVMETHOD(bus_activate_resource,	bus_generic_activate_resource),
 	DEVMETHOD(bus_deactivate_resource,	bus_generic_deactivate_resource),
 	DEVMETHOD(bus_setup_intr,		bus_generic_setup_intr),
 	DEVMETHOD(bus_teardown_intr,		bus_generic_teardown_intr),
 #if __FreeBSD_version >= 1100000
 	DEVMETHOD(bus_get_cpus,			bus_generic_get_cpus),
 #endif
 
 	/* pcib interface */
 	DEVMETHOD(pcib_alloc_msi,		vmbus_alloc_msi),
 	DEVMETHOD(pcib_release_msi,		vmbus_release_msi),
 	DEVMETHOD(pcib_alloc_msix,		vmbus_alloc_msix),
 	DEVMETHOD(pcib_release_msix,		vmbus_release_msix),
 	DEVMETHOD(pcib_map_msi,			vmbus_map_msi),
 
 	/* Vmbus interface */
 	DEVMETHOD(vmbus_get_version,		vmbus_get_version_method),
 	DEVMETHOD(vmbus_probe_guid,		vmbus_probe_guid_method),
 	DEVMETHOD(vmbus_get_vcpu_id,		vmbus_get_vcpu_id_method),
 
 	DEVMETHOD_END
 };
 
 static driver_t vmbus_driver = {
 	"vmbus",
 	vmbus_methods,
 	sizeof(struct vmbus_softc)
 };
 
 static devclass_t vmbus_devclass;
 
 DRIVER_MODULE(vmbus, acpi, vmbus_driver, vmbus_devclass, NULL, NULL);
 MODULE_DEPEND(vmbus, acpi, 1, 1, 1);
 MODULE_DEPEND(vmbus, pci, 1, 1, 1);
 MODULE_VERSION(vmbus, 1);
 
 static __inline struct vmbus_softc *
 vmbus_get_softc(void)
 {
 	return vmbus_sc;
 }
 
 void
 vmbus_msghc_reset(struct vmbus_msghc *mh, size_t dsize)
 {
 	struct hypercall_postmsg_in *inprm;
 
 	if (dsize > HYPERCALL_POSTMSGIN_DSIZE_MAX)
 		panic("invalid data size %zu", dsize);
 
 	inprm = vmbus_xact_req_data(mh->mh_xact);
 	memset(inprm, 0, HYPERCALL_POSTMSGIN_SIZE);
 	inprm->hc_connid = VMBUS_CONNID_MESSAGE;
 	inprm->hc_msgtype = HYPERV_MSGTYPE_CHANNEL;
 	inprm->hc_dsize = dsize;
 }
 
 struct vmbus_msghc *
 vmbus_msghc_get(struct vmbus_softc *sc, size_t dsize)
 {
 	struct vmbus_msghc *mh;
 	struct vmbus_xact *xact;
 
 	if (dsize > HYPERCALL_POSTMSGIN_DSIZE_MAX)
 		panic("invalid data size %zu", dsize);
 
 	xact = vmbus_xact_get(sc->vmbus_xc,
 	    dsize + __offsetof(struct hypercall_postmsg_in, hc_data[0]));
 	if (xact == NULL)
 		return (NULL);
 
 	mh = vmbus_xact_priv(xact, sizeof(*mh));
 	mh->mh_xact = xact;
 
 	vmbus_msghc_reset(mh, dsize);
 	return (mh);
 }
 
 void
 vmbus_msghc_put(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh)
 {
 
 	vmbus_xact_put(mh->mh_xact);
 }
 
 void *
 vmbus_msghc_dataptr(struct vmbus_msghc *mh)
 {
 	struct hypercall_postmsg_in *inprm;
 
 	inprm = vmbus_xact_req_data(mh->mh_xact);
 	return (inprm->hc_data);
 }
 
 int
 vmbus_msghc_exec_noresult(struct vmbus_msghc *mh)
 {
 	sbintime_t time = SBT_1MS;
 	struct hypercall_postmsg_in *inprm;
 	bus_addr_t inprm_paddr;
 	int i;
 
 	inprm = vmbus_xact_req_data(mh->mh_xact);
 	inprm_paddr = vmbus_xact_req_paddr(mh->mh_xact);
 
 	/*
 	 * Save the input parameter so that we could restore the input
 	 * parameter if the Hypercall failed.
 	 *
 	 * XXX
 	 * Is this really necessary?!  i.e. Will the Hypercall ever
 	 * overwrite the input parameter?
 	 */
 	memcpy(&mh->mh_inprm_save, inprm, HYPERCALL_POSTMSGIN_SIZE);
 
 	/*
 	 * In order to cope with transient failures, e.g. insufficient
 	 * resources on host side, we retry the post message Hypercall
 	 * several times.  20 retries seem sufficient.
 	 */
 #define HC_RETRY_MAX	20
 
 	for (i = 0; i < HC_RETRY_MAX; ++i) {
 		uint64_t status;
 
 		status = hypercall_post_message(inprm_paddr);
 		if (status == HYPERCALL_STATUS_SUCCESS)
 			return 0;
 
 		pause_sbt("hcpmsg", time, 0, C_HARDCLOCK);
 		if (time < SBT_1S * 2)
 			time *= 2;
 
 		/* Restore input parameter and try again */
 		memcpy(inprm, &mh->mh_inprm_save, HYPERCALL_POSTMSGIN_SIZE);
 	}
 
 #undef HC_RETRY_MAX
 
 	return EIO;
 }
 
 int
 vmbus_msghc_exec(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh)
 {
 	int error;
 
 	vmbus_xact_activate(mh->mh_xact);
 	error = vmbus_msghc_exec_noresult(mh);
 	if (error)
 		vmbus_xact_deactivate(mh->mh_xact);
 	return error;
 }
 
+void
+vmbus_msghc_exec_cancel(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh)
+{
+
+	vmbus_xact_deactivate(mh->mh_xact);
+}
+
 const struct vmbus_message *
 vmbus_msghc_wait_result(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh)
 {
 	size_t resp_len;
 
 	return (vmbus_xact_wait(mh->mh_xact, &resp_len));
 }
 
+const struct vmbus_message *
+vmbus_msghc_poll_result(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh)
+{
+	size_t resp_len;
+
+	return (vmbus_xact_poll(mh->mh_xact, &resp_len));
+}
+
 void
 vmbus_msghc_wakeup(struct vmbus_softc *sc, const struct vmbus_message *msg)
 {
 
 	vmbus_xact_ctx_wakeup(sc->vmbus_xc, msg, sizeof(*msg));
 }
 
 uint32_t
 vmbus_gpadl_alloc(struct vmbus_softc *sc)
 {
-	return atomic_fetchadd_int(&sc->vmbus_gpadl, 1);
+	uint32_t gpadl;
+
+again:
+	gpadl = atomic_fetchadd_int(&sc->vmbus_gpadl, 1); 
+	if (gpadl == 0)
+		goto again;
+	return (gpadl);
 }
 
 static int
 vmbus_connect(struct vmbus_softc *sc, uint32_t version)
 {
 	struct vmbus_chanmsg_connect *req;
 	const struct vmbus_message *msg;
 	struct vmbus_msghc *mh;
 	int error, done = 0;
 
 	mh = vmbus_msghc_get(sc, sizeof(*req));
 	if (mh == NULL)
 		return ENXIO;
 
 	req = vmbus_msghc_dataptr(mh);
 	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CONNECT;
 	req->chm_ver = version;
 	req->chm_evtflags = sc->vmbus_evtflags_dma.hv_paddr;
 	req->chm_mnf1 = sc->vmbus_mnf1_dma.hv_paddr;
 	req->chm_mnf2 = sc->vmbus_mnf2_dma.hv_paddr;
 
 	error = vmbus_msghc_exec(sc, mh);
 	if (error) {
 		vmbus_msghc_put(sc, mh);
 		return error;
 	}
 
 	msg = vmbus_msghc_wait_result(sc, mh);
 	done = ((const struct vmbus_chanmsg_connect_resp *)
 	    msg->msg_data)->chm_done;
 
 	vmbus_msghc_put(sc, mh);
 
 	return (done ? 0 : EOPNOTSUPP);
 }
 
 static int
 vmbus_init(struct vmbus_softc *sc)
 {
 	int i;
 
 	for (i = 0; i < nitems(vmbus_version); ++i) {
 		int error;
 
 		error = vmbus_connect(sc, vmbus_version[i]);
 		if (!error) {
 			sc->vmbus_version = vmbus_version[i];
 			device_printf(sc->vmbus_dev, "version %u.%u\n",
 			    VMBUS_VERSION_MAJOR(sc->vmbus_version),
 			    VMBUS_VERSION_MINOR(sc->vmbus_version));
 			return 0;
 		}
 	}
 	return ENXIO;
 }
 
 static void
 vmbus_disconnect(struct vmbus_softc *sc)
 {
 	struct vmbus_chanmsg_disconnect *req;
 	struct vmbus_msghc *mh;
 	int error;
 
 	mh = vmbus_msghc_get(sc, sizeof(*req));
 	if (mh == NULL) {
 		device_printf(sc->vmbus_dev,
 		    "can not get msg hypercall for disconnect\n");
 		return;
 	}
 
 	req = vmbus_msghc_dataptr(mh);
 	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_DISCONNECT;
 
 	error = vmbus_msghc_exec_noresult(mh);
 	vmbus_msghc_put(sc, mh);
 
 	if (error) {
 		device_printf(sc->vmbus_dev,
 		    "disconnect msg hypercall failed\n");
 	}
 }
 
 static int
 vmbus_req_channels(struct vmbus_softc *sc)
 {
 	struct vmbus_chanmsg_chrequest *req;
 	struct vmbus_msghc *mh;
 	int error;
 
 	mh = vmbus_msghc_get(sc, sizeof(*req));
 	if (mh == NULL)
 		return ENXIO;
 
 	req = vmbus_msghc_dataptr(mh);
 	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHREQUEST;
 
 	error = vmbus_msghc_exec_noresult(mh);
 	vmbus_msghc_put(sc, mh);
 
 	return error;
 }
 
 static void
 vmbus_scan_done_task(void *xsc, int pending __unused)
 {
 	struct vmbus_softc *sc = xsc;
 
 	mtx_lock(&Giant);
 	sc->vmbus_scandone = true;
 	mtx_unlock(&Giant);
 	wakeup(&sc->vmbus_scandone);
 }
 
 static void
 vmbus_scan_done(struct vmbus_softc *sc,
     const struct vmbus_message *msg __unused)
 {
 
 	taskqueue_enqueue(sc->vmbus_devtq, &sc->vmbus_scandone_task);
 }
 
 static int
 vmbus_scan(struct vmbus_softc *sc)
 {
 	int error;
 
 	/*
 	 * Identify, probe and attach for non-channel devices.
 	 */
 	bus_generic_probe(sc->vmbus_dev);
 	bus_generic_attach(sc->vmbus_dev);
 
 	/*
 	 * This taskqueue serializes vmbus devices' attach and detach
 	 * for channel offer and rescind messages.
 	 */
 	sc->vmbus_devtq = taskqueue_create("vmbus dev", M_WAITOK,
 	    taskqueue_thread_enqueue, &sc->vmbus_devtq);
 	taskqueue_start_threads(&sc->vmbus_devtq, 1, PI_NET, "vmbusdev");
 	TASK_INIT(&sc->vmbus_scandone_task, 0, vmbus_scan_done_task, sc);
 
 	/*
 	 * This taskqueue handles sub-channel detach, so that vmbus
 	 * device's detach running in vmbus_devtq can drain its sub-
 	 * channels.
 	 */
 	sc->vmbus_subchtq = taskqueue_create("vmbus subch", M_WAITOK,
 	    taskqueue_thread_enqueue, &sc->vmbus_subchtq);
 	taskqueue_start_threads(&sc->vmbus_subchtq, 1, PI_NET, "vmbussch");
 
 	/*
 	 * Start vmbus scanning.
 	 */
 	error = vmbus_req_channels(sc);
 	if (error) {
 		device_printf(sc->vmbus_dev, "channel request failed: %d\n",
 		    error);
 		return (error);
 	}
 
 	/*
 	 * Wait for all vmbus devices from the initial channel offers to be
 	 * attached.
 	 */
 	GIANT_REQUIRED;
 	while (!sc->vmbus_scandone)
 		mtx_sleep(&sc->vmbus_scandone, &Giant, 0, "vmbusdev", 0);
 
 	if (bootverbose) {
 		device_printf(sc->vmbus_dev, "device scan, probe and attach "
 		    "done\n");
 	}
 	return (0);
 }
 
 static void
 vmbus_scan_teardown(struct vmbus_softc *sc)
 {
 
 	GIANT_REQUIRED;
 	if (sc->vmbus_devtq != NULL) {
 		mtx_unlock(&Giant);
 		taskqueue_free(sc->vmbus_devtq);
 		mtx_lock(&Giant);
 		sc->vmbus_devtq = NULL;
 	}
 	if (sc->vmbus_subchtq != NULL) {
 		mtx_unlock(&Giant);
 		taskqueue_free(sc->vmbus_subchtq);
 		mtx_lock(&Giant);
 		sc->vmbus_subchtq = NULL;
 	}
 }
 
 static void
 vmbus_chanmsg_handle(struct vmbus_softc *sc, const struct vmbus_message *msg)
 {
 	vmbus_chanmsg_proc_t msg_proc;
 	uint32_t msg_type;
 
 	msg_type = ((const struct vmbus_chanmsg_hdr *)msg->msg_data)->chm_type;
 	if (msg_type >= VMBUS_CHANMSG_TYPE_MAX) {
 		device_printf(sc->vmbus_dev, "unknown message type 0x%x\n",
 		    msg_type);
 		return;
 	}
 
 	msg_proc = vmbus_chanmsg_handlers[msg_type];
 	if (msg_proc != NULL)
 		msg_proc(sc, msg);
 
 	/* Channel specific processing */
 	vmbus_chan_msgproc(sc, msg);
 }
 
 static void
 vmbus_msg_task(void *xsc, int pending __unused)
 {
 	struct vmbus_softc *sc = xsc;
 	volatile struct vmbus_message *msg;
 
 	msg = VMBUS_PCPU_GET(sc, message, curcpu) + VMBUS_SINT_MESSAGE;
 	for (;;) {
 		if (msg->msg_type == HYPERV_MSGTYPE_NONE) {
 			/* No message */
 			break;
 		} else if (msg->msg_type == HYPERV_MSGTYPE_CHANNEL) {
 			/* Channel message */
 			vmbus_chanmsg_handle(sc,
 			    __DEVOLATILE(const struct vmbus_message *, msg));
 		}
 
 		msg->msg_type = HYPERV_MSGTYPE_NONE;
 		/*
 		 * Make sure the write to msg_type (i.e. set to
 		 * HYPERV_MSGTYPE_NONE) happens before we read the
 		 * msg_flags and EOMing. Otherwise, the EOMing will
 		 * not deliver any more messages since there is no
 		 * empty slot
 		 *
 		 * NOTE:
 		 * mb() is used here, since atomic_thread_fence_seq_cst()
 		 * will become compiler fence on UP kernel.
 		 */
 		mb();
 		if (msg->msg_flags & VMBUS_MSGFLAG_PENDING) {
 			/*
 			 * This will cause message queue rescan to possibly
 			 * deliver another msg from the hypervisor
 			 */
 			wrmsr(MSR_HV_EOM, 0);
 		}
 	}
 }
 
 static __inline int
 vmbus_handle_intr1(struct vmbus_softc *sc, struct trapframe *frame, int cpu)
 {
 	volatile struct vmbus_message *msg;
 	struct vmbus_message *msg_base;
 
 	msg_base = VMBUS_PCPU_GET(sc, message, cpu);
 
 	/*
 	 * Check event timer.
 	 *
 	 * TODO: move this to independent IDT vector.
 	 */
 	msg = msg_base + VMBUS_SINT_TIMER;
 	if (msg->msg_type == HYPERV_MSGTYPE_TIMER_EXPIRED) {
 		msg->msg_type = HYPERV_MSGTYPE_NONE;
 
 		vmbus_et_intr(frame);
 
 		/*
 		 * Make sure the write to msg_type (i.e. set to
 		 * HYPERV_MSGTYPE_NONE) happens before we read the
 		 * msg_flags and EOMing. Otherwise, the EOMing will
 		 * not deliver any more messages since there is no
 		 * empty slot
 		 *
 		 * NOTE:
 		 * mb() is used here, since atomic_thread_fence_seq_cst()
 		 * will become compiler fence on UP kernel.
 		 */
 		mb();
 		if (msg->msg_flags & VMBUS_MSGFLAG_PENDING) {
 			/*
 			 * This will cause message queue rescan to possibly
 			 * deliver another msg from the hypervisor
 			 */
 			wrmsr(MSR_HV_EOM, 0);
 		}
 	}
 
 	/*
 	 * Check events.  Hot path for network and storage I/O data; high rate.
 	 *
 	 * NOTE:
 	 * As recommended by the Windows guest fellows, we check events before
 	 * checking messages.
 	 */
 	sc->vmbus_event_proc(sc, cpu);
 
 	/*
 	 * Check messages.  Mainly management stuffs; ultra low rate.
 	 */
 	msg = msg_base + VMBUS_SINT_MESSAGE;
 	if (__predict_false(msg->msg_type != HYPERV_MSGTYPE_NONE)) {
 		taskqueue_enqueue(VMBUS_PCPU_GET(sc, message_tq, cpu),
 		    VMBUS_PCPU_PTR(sc, message_task, cpu));
 	}
 
 	return (FILTER_HANDLED);
 }
 
 void
 vmbus_handle_intr(struct trapframe *trap_frame)
 {
 	struct vmbus_softc *sc = vmbus_get_softc();
 	int cpu = curcpu;
 
 	/*
 	 * Disable preemption.
 	 */
 	critical_enter();
 
 	/*
 	 * Do a little interrupt counting.
 	 */
 	(*VMBUS_PCPU_GET(sc, intr_cnt, cpu))++;
 
 	vmbus_handle_intr1(sc, trap_frame, cpu);
 
 	/*
 	 * Enable preemption.
 	 */
 	critical_exit();
 }
 
 static void
 vmbus_synic_setup(void *xsc)
 {
 	struct vmbus_softc *sc = xsc;
 	int cpu = curcpu;
 	uint64_t val, orig;
 	uint32_t sint;
 
 	if (hyperv_features & CPUID_HV_MSR_VP_INDEX) {
 		/* Save virtual processor id. */
 		VMBUS_PCPU_GET(sc, vcpuid, cpu) = rdmsr(MSR_HV_VP_INDEX);
 	} else {
 		/* Set virtual processor id to 0 for compatibility. */
 		VMBUS_PCPU_GET(sc, vcpuid, cpu) = 0;
 	}
 
 	/*
 	 * Setup the SynIC message.
 	 */
 	orig = rdmsr(MSR_HV_SIMP);
 	val = MSR_HV_SIMP_ENABLE | (orig & MSR_HV_SIMP_RSVD_MASK) |
 	    ((VMBUS_PCPU_GET(sc, message_dma.hv_paddr, cpu) >> PAGE_SHIFT) <<
 	     MSR_HV_SIMP_PGSHIFT);
 	wrmsr(MSR_HV_SIMP, val);
 
 	/*
 	 * Setup the SynIC event flags.
 	 */
 	orig = rdmsr(MSR_HV_SIEFP);
 	val = MSR_HV_SIEFP_ENABLE | (orig & MSR_HV_SIEFP_RSVD_MASK) |
 	    ((VMBUS_PCPU_GET(sc, event_flags_dma.hv_paddr, cpu)
 	      >> PAGE_SHIFT) << MSR_HV_SIEFP_PGSHIFT);
 	wrmsr(MSR_HV_SIEFP, val);
 
 
 	/*
 	 * Configure and unmask SINT for message and event flags.
 	 */
 	sint = MSR_HV_SINT0 + VMBUS_SINT_MESSAGE;
 	orig = rdmsr(sint);
 	val = sc->vmbus_idtvec | MSR_HV_SINT_AUTOEOI |
 	    (orig & MSR_HV_SINT_RSVD_MASK);
 	wrmsr(sint, val);
 
 	/*
 	 * Configure and unmask SINT for timer.
 	 */
 	sint = MSR_HV_SINT0 + VMBUS_SINT_TIMER;
 	orig = rdmsr(sint);
 	val = sc->vmbus_idtvec | MSR_HV_SINT_AUTOEOI |
 	    (orig & MSR_HV_SINT_RSVD_MASK);
 	wrmsr(sint, val);
 
 	/*
 	 * All done; enable SynIC.
 	 */
 	orig = rdmsr(MSR_HV_SCONTROL);
 	val = MSR_HV_SCTRL_ENABLE | (orig & MSR_HV_SCTRL_RSVD_MASK);
 	wrmsr(MSR_HV_SCONTROL, val);
 }
 
 static void
 vmbus_synic_teardown(void *arg)
 {
 	uint64_t orig;
 	uint32_t sint;
 
 	/*
 	 * Disable SynIC.
 	 */
 	orig = rdmsr(MSR_HV_SCONTROL);
 	wrmsr(MSR_HV_SCONTROL, (orig & MSR_HV_SCTRL_RSVD_MASK));
 
 	/*
 	 * Mask message and event flags SINT.
 	 */
 	sint = MSR_HV_SINT0 + VMBUS_SINT_MESSAGE;
 	orig = rdmsr(sint);
 	wrmsr(sint, orig | MSR_HV_SINT_MASKED);
 
 	/*
 	 * Mask timer SINT.
 	 */
 	sint = MSR_HV_SINT0 + VMBUS_SINT_TIMER;
 	orig = rdmsr(sint);
 	wrmsr(sint, orig | MSR_HV_SINT_MASKED);
 
 	/*
 	 * Teardown SynIC message.
 	 */
 	orig = rdmsr(MSR_HV_SIMP);
 	wrmsr(MSR_HV_SIMP, (orig & MSR_HV_SIMP_RSVD_MASK));
 
 	/*
 	 * Teardown SynIC event flags.
 	 */
 	orig = rdmsr(MSR_HV_SIEFP);
 	wrmsr(MSR_HV_SIEFP, (orig & MSR_HV_SIEFP_RSVD_MASK));
 }
 
 static int
 vmbus_dma_alloc(struct vmbus_softc *sc)
 {
 	bus_dma_tag_t parent_dtag;
 	uint8_t *evtflags;
 	int cpu;
 
 	parent_dtag = bus_get_dma_tag(sc->vmbus_dev);
 	CPU_FOREACH(cpu) {
 		void *ptr;
 
 		/*
 		 * Per-cpu messages and event flags.
 		 */
 		ptr = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
 		    PAGE_SIZE, VMBUS_PCPU_PTR(sc, message_dma, cpu),
 		    BUS_DMA_WAITOK | BUS_DMA_ZERO);
 		if (ptr == NULL)
 			return ENOMEM;
 		VMBUS_PCPU_GET(sc, message, cpu) = ptr;
 
 		ptr = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
 		    PAGE_SIZE, VMBUS_PCPU_PTR(sc, event_flags_dma, cpu),
 		    BUS_DMA_WAITOK | BUS_DMA_ZERO);
 		if (ptr == NULL)
 			return ENOMEM;
 		VMBUS_PCPU_GET(sc, event_flags, cpu) = ptr;
 	}
 
 	evtflags = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
 	    PAGE_SIZE, &sc->vmbus_evtflags_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO);
 	if (evtflags == NULL)
 		return ENOMEM;
 	sc->vmbus_rx_evtflags = (u_long *)evtflags;
 	sc->vmbus_tx_evtflags = (u_long *)(evtflags + (PAGE_SIZE / 2));
 	sc->vmbus_evtflags = evtflags;
 
 	sc->vmbus_mnf1 = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
 	    PAGE_SIZE, &sc->vmbus_mnf1_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO);
 	if (sc->vmbus_mnf1 == NULL)
 		return ENOMEM;
 
 	sc->vmbus_mnf2 = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
 	    sizeof(struct vmbus_mnf), &sc->vmbus_mnf2_dma,
 	    BUS_DMA_WAITOK | BUS_DMA_ZERO);
 	if (sc->vmbus_mnf2 == NULL)
 		return ENOMEM;
 
 	return 0;
 }
 
 static void
 vmbus_dma_free(struct vmbus_softc *sc)
 {
 	int cpu;
 
 	if (sc->vmbus_evtflags != NULL) {
 		hyperv_dmamem_free(&sc->vmbus_evtflags_dma, sc->vmbus_evtflags);
 		sc->vmbus_evtflags = NULL;
 		sc->vmbus_rx_evtflags = NULL;
 		sc->vmbus_tx_evtflags = NULL;
 	}
 	if (sc->vmbus_mnf1 != NULL) {
 		hyperv_dmamem_free(&sc->vmbus_mnf1_dma, sc->vmbus_mnf1);
 		sc->vmbus_mnf1 = NULL;
 	}
 	if (sc->vmbus_mnf2 != NULL) {
 		hyperv_dmamem_free(&sc->vmbus_mnf2_dma, sc->vmbus_mnf2);
 		sc->vmbus_mnf2 = NULL;
 	}
 
 	CPU_FOREACH(cpu) {
 		if (VMBUS_PCPU_GET(sc, message, cpu) != NULL) {
 			hyperv_dmamem_free(
 			    VMBUS_PCPU_PTR(sc, message_dma, cpu),
 			    VMBUS_PCPU_GET(sc, message, cpu));
 			VMBUS_PCPU_GET(sc, message, cpu) = NULL;
 		}
 		if (VMBUS_PCPU_GET(sc, event_flags, cpu) != NULL) {
 			hyperv_dmamem_free(
 			    VMBUS_PCPU_PTR(sc, event_flags_dma, cpu),
 			    VMBUS_PCPU_GET(sc, event_flags, cpu));
 			VMBUS_PCPU_GET(sc, event_flags, cpu) = NULL;
 		}
 	}
 }
 
 static int
 vmbus_intr_setup(struct vmbus_softc *sc)
 {
 	int cpu;
 
 	CPU_FOREACH(cpu) {
 		char buf[MAXCOMLEN + 1];
 		cpuset_t cpu_mask;
 
 		/* Allocate an interrupt counter for Hyper-V interrupt */
 		snprintf(buf, sizeof(buf), "cpu%d:hyperv", cpu);
 		intrcnt_add(buf, VMBUS_PCPU_PTR(sc, intr_cnt, cpu));
 
 		/*
 		 * Setup taskqueue to handle events.  Task will be per-
 		 * channel.
 		 */
 		VMBUS_PCPU_GET(sc, event_tq, cpu) = taskqueue_create_fast(
 		    "hyperv event", M_WAITOK, taskqueue_thread_enqueue,
 		    VMBUS_PCPU_PTR(sc, event_tq, cpu));
 		CPU_SETOF(cpu, &cpu_mask);
 		taskqueue_start_threads_cpuset(
 		    VMBUS_PCPU_PTR(sc, event_tq, cpu), 1, PI_NET, &cpu_mask,
 		    "hvevent%d", cpu);
 
 		/*
 		 * Setup tasks and taskqueues to handle messages.
 		 */
 		VMBUS_PCPU_GET(sc, message_tq, cpu) = taskqueue_create_fast(
 		    "hyperv msg", M_WAITOK, taskqueue_thread_enqueue,
 		    VMBUS_PCPU_PTR(sc, message_tq, cpu));
 		CPU_SETOF(cpu, &cpu_mask);
 		taskqueue_start_threads_cpuset(
 		    VMBUS_PCPU_PTR(sc, message_tq, cpu), 1, PI_NET, &cpu_mask,
 		    "hvmsg%d", cpu);
 		TASK_INIT(VMBUS_PCPU_PTR(sc, message_task, cpu), 0,
 		    vmbus_msg_task, sc);
 	}
 
 	/*
 	 * All Hyper-V ISR required resources are setup, now let's find a
 	 * free IDT vector for Hyper-V ISR and set it up.
 	 */
 	sc->vmbus_idtvec = lapic_ipi_alloc(IDTVEC(vmbus_isr));
 	if (sc->vmbus_idtvec < 0) {
 		device_printf(sc->vmbus_dev, "cannot find free IDT vector\n");
 		return ENXIO;
 	}
 	if (bootverbose) {
 		device_printf(sc->vmbus_dev, "vmbus IDT vector %d\n",
 		    sc->vmbus_idtvec);
 	}
 	return 0;
 }
 
 static void
 vmbus_intr_teardown(struct vmbus_softc *sc)
 {
 	int cpu;
 
 	if (sc->vmbus_idtvec >= 0) {
 		lapic_ipi_free(sc->vmbus_idtvec);
 		sc->vmbus_idtvec = -1;
 	}
 
 	CPU_FOREACH(cpu) {
 		if (VMBUS_PCPU_GET(sc, event_tq, cpu) != NULL) {
 			taskqueue_free(VMBUS_PCPU_GET(sc, event_tq, cpu));
 			VMBUS_PCPU_GET(sc, event_tq, cpu) = NULL;
 		}
 		if (VMBUS_PCPU_GET(sc, message_tq, cpu) != NULL) {
 			taskqueue_drain(VMBUS_PCPU_GET(sc, message_tq, cpu),
 			    VMBUS_PCPU_PTR(sc, message_task, cpu));
 			taskqueue_free(VMBUS_PCPU_GET(sc, message_tq, cpu));
 			VMBUS_PCPU_GET(sc, message_tq, cpu) = NULL;
 		}
 	}
 }
 
 static int
 vmbus_read_ivar(device_t dev, device_t child, int index, uintptr_t *result)
 {
 	return (ENOENT);
 }
 
 static int
 vmbus_child_pnpinfo_str(device_t dev, device_t child, char *buf, size_t buflen)
 {
 	const struct vmbus_channel *chan;
 	char guidbuf[HYPERV_GUID_STRLEN];
 
 	chan = vmbus_get_channel(child);
 	if (chan == NULL) {
 		/* Event timer device, which does not belong to a channel */
 		return (0);
 	}
 
 	strlcat(buf, "classid=", buflen);
 	hyperv_guid2str(&chan->ch_guid_type, guidbuf, sizeof(guidbuf));
 	strlcat(buf, guidbuf, buflen);
 
 	strlcat(buf, " deviceid=", buflen);
 	hyperv_guid2str(&chan->ch_guid_inst, guidbuf, sizeof(guidbuf));
 	strlcat(buf, guidbuf, buflen);
 
 	return (0);
 }
 
 int
 vmbus_add_child(struct vmbus_channel *chan)
 {
 	struct vmbus_softc *sc = chan->ch_vmbus;
 	device_t parent = sc->vmbus_dev;
 
 	mtx_lock(&Giant);
 
 	chan->ch_dev = device_add_child(parent, NULL, -1);
 	if (chan->ch_dev == NULL) {
 		mtx_unlock(&Giant);
 		device_printf(parent, "device_add_child for chan%u failed\n",
 		    chan->ch_id);
 		return (ENXIO);
 	}
 	device_set_ivars(chan->ch_dev, chan);
 	device_probe_and_attach(chan->ch_dev);
 
 	mtx_unlock(&Giant);
 	return (0);
 }
 
 int
 vmbus_delete_child(struct vmbus_channel *chan)
 {
 	int error = 0;
 
 	mtx_lock(&Giant);
 	if (chan->ch_dev != NULL) {
 		error = device_delete_child(chan->ch_vmbus->vmbus_dev,
 		    chan->ch_dev);
 		chan->ch_dev = NULL;
 	}
 	mtx_unlock(&Giant);
 	return (error);
 }
 
 static int
 vmbus_sysctl_version(SYSCTL_HANDLER_ARGS)
 {
 	struct vmbus_softc *sc = arg1;
 	char verstr[16];
 
 	snprintf(verstr, sizeof(verstr), "%u.%u",
 	    VMBUS_VERSION_MAJOR(sc->vmbus_version),
 	    VMBUS_VERSION_MINOR(sc->vmbus_version));
 	return sysctl_handle_string(oidp, verstr, sizeof(verstr), req);
 }
 
 /*
  * We need the function to make sure the MMIO resource is allocated from the
  * ranges found in _CRS.
  *
  * For the release function, we can use bus_generic_release_resource().
  */
 static struct resource *
 vmbus_alloc_resource(device_t dev, device_t child, int type, int *rid,
     rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
 {
 	device_t parent = device_get_parent(dev);
 	struct resource *res;
 
 #ifdef NEW_PCIB
 	if (type == SYS_RES_MEMORY) {
 		struct vmbus_softc *sc = device_get_softc(dev);
 
 		res = pcib_host_res_alloc(&sc->vmbus_mmio_res, child, type,
 		    rid, start, end, count, flags);
 	} else
 #endif
 	{
 		res = BUS_ALLOC_RESOURCE(parent, child, type, rid, start,
 		    end, count, flags);
 	}
 
 	return (res);
 }
 
 static device_t
 get_nexus(device_t vmbus)
 {
 	device_t acpi = device_get_parent(vmbus);
 	device_t nexus = device_get_parent(acpi);
 	return (nexus);
 }
 
 static int
 vmbus_alloc_msi(device_t bus, device_t dev, int count, int maxcount, int *irqs)
 {
 	return (PCIB_ALLOC_MSI(get_nexus(bus), dev, count, maxcount, irqs));
 }
 
 static int
 vmbus_release_msi(device_t bus, device_t dev, int count, int *irqs)
 {
 	return (PCIB_RELEASE_MSI(get_nexus(bus), dev, count, irqs));
 }
 
 static int
 vmbus_alloc_msix(device_t bus, device_t dev, int *irq)
 {
 	return (PCIB_ALLOC_MSIX(get_nexus(bus), dev, irq));
 }
 
 static int
 vmbus_release_msix(device_t bus, device_t dev, int irq)
 {
 	return (PCIB_RELEASE_MSIX(get_nexus(bus), dev, irq));
 }
 
 static int
 vmbus_map_msi(device_t bus, device_t dev, int irq, uint64_t *addr,
 	uint32_t *data)
 {
 	return (PCIB_MAP_MSI(get_nexus(bus), dev, irq, addr, data));
 }
 
 static uint32_t
 vmbus_get_version_method(device_t bus, device_t dev)
 {
 	struct vmbus_softc *sc = device_get_softc(bus);
 
 	return sc->vmbus_version;
 }
 
 static int
 vmbus_probe_guid_method(device_t bus, device_t dev,
     const struct hyperv_guid *guid)
 {
 	const struct vmbus_channel *chan = vmbus_get_channel(dev);
 
 	if (memcmp(&chan->ch_guid_type, guid, sizeof(struct hyperv_guid)) == 0)
 		return 0;
 	return ENXIO;
 }
 
 static uint32_t
 vmbus_get_vcpu_id_method(device_t bus, device_t dev, int cpu)
 {
 	const struct vmbus_softc *sc = device_get_softc(bus);
 
 	return (VMBUS_PCPU_GET(sc, vcpuid, cpu));
 }
 
 #ifdef NEW_PCIB
 #define VTPM_BASE_ADDR 0xfed40000
 #define FOUR_GB (1ULL << 32)
 
 enum parse_pass { parse_64, parse_32 };
 
 struct parse_context {
 	device_t vmbus_dev;
 	enum parse_pass pass;
 };
 
 static ACPI_STATUS
 parse_crs(ACPI_RESOURCE *res, void *ctx)
 {
 	const struct parse_context *pc = ctx;
 	device_t vmbus_dev = pc->vmbus_dev;
 
 	struct vmbus_softc *sc = device_get_softc(vmbus_dev);
 	UINT64 start, end;
 
 	switch (res->Type) {
 	case ACPI_RESOURCE_TYPE_ADDRESS32:
 		start = res->Data.Address32.Address.Minimum;
 		end = res->Data.Address32.Address.Maximum;
 		break;
 
 	case ACPI_RESOURCE_TYPE_ADDRESS64:
 		start = res->Data.Address64.Address.Minimum;
 		end = res->Data.Address64.Address.Maximum;
 		break;
 
 	default:
 		/* Unused types. */
 		return (AE_OK);
 	}
 
 	/*
 	 * We don't use <1MB addresses.
 	 */
 	if (end < 0x100000)
 		return (AE_OK);
 
 	/* Don't conflict with vTPM. */
 	if (end >= VTPM_BASE_ADDR && start < VTPM_BASE_ADDR)
 		end = VTPM_BASE_ADDR - 1;
 
 	if ((pc->pass == parse_32 && start < FOUR_GB) ||
 	    (pc->pass == parse_64 && start >= FOUR_GB))
 		pcib_host_res_decodes(&sc->vmbus_mmio_res, SYS_RES_MEMORY,
 		    start, end, 0);
 
 	return (AE_OK);
 }
 
 static void
 vmbus_get_crs(device_t dev, device_t vmbus_dev, enum parse_pass pass)
 {
 	struct parse_context pc;
 	ACPI_STATUS status;
 
 	if (bootverbose)
 		device_printf(dev, "walking _CRS, pass=%d\n", pass);
 
 	pc.vmbus_dev = vmbus_dev;
 	pc.pass = pass;
 	status = AcpiWalkResources(acpi_get_handle(dev), "_CRS",
 			parse_crs, &pc);
 
 	if (bootverbose && ACPI_FAILURE(status))
 		device_printf(dev, "_CRS: not found, pass=%d\n", pass);
 }
 
 static void
 vmbus_get_mmio_res_pass(device_t dev, enum parse_pass pass)
 {
 	device_t acpi0, pcib0 = NULL;
 	device_t *children;
 	int i, count;
 
 	/* Try to find _CRS on VMBus device */
 	vmbus_get_crs(dev, dev, pass);
 
 	/* Try to find _CRS on VMBus device's parent */
 	acpi0 = device_get_parent(dev);
 	vmbus_get_crs(acpi0, dev, pass);
 
 	/* Try to locate pcib0 and find _CRS on it */
 	if (device_get_children(acpi0, &children, &count) != 0)
 		return;
 
 	for (i = 0; i < count; i++) {
 		if (!device_is_attached(children[i]))
 			continue;
 
 		if (strcmp("pcib0", device_get_nameunit(children[i])))
 			continue;
 
 		pcib0 = children[i];
 		break;
 	}
 
 	if (pcib0)
 		vmbus_get_crs(pcib0, dev, pass);
 
 	free(children, M_TEMP);
 }
 
 static void
 vmbus_get_mmio_res(device_t dev)
 {
 	struct vmbus_softc *sc = device_get_softc(dev);
 	/*
 	 * We walk the resources twice to make sure that: in the resource
 	 * list, the 32-bit resources appear behind the 64-bit resources.
 	 * NB: resource_list_add() uses INSERT_TAIL. This way, when we
 	 * iterate through the list to find a range for a 64-bit BAR in
 	 * vmbus_alloc_resource(), we can make sure we try to use >4GB
 	 * ranges first.
 	 */
 	pcib_host_res_init(dev, &sc->vmbus_mmio_res);
 
 	vmbus_get_mmio_res_pass(dev, parse_64);
 	vmbus_get_mmio_res_pass(dev, parse_32);
 }
 
 static void
 vmbus_free_mmio_res(device_t dev)
 {
 	struct vmbus_softc *sc = device_get_softc(dev);
 
 	pcib_host_res_free(dev, &sc->vmbus_mmio_res);
 }
 #endif	/* NEW_PCIB */
 
 static int
 vmbus_probe(device_t dev)
 {
 	char *id[] = { "VMBUS", NULL };
 
 	if (ACPI_ID_PROBE(device_get_parent(dev), dev, id) == NULL ||
 	    device_get_unit(dev) != 0 || vm_guest != VM_GUEST_HV ||
 	    (hyperv_features & CPUID_HV_MSR_SYNIC) == 0)
 		return (ENXIO);
 
 	device_set_desc(dev, "Hyper-V Vmbus");
 
 	return (BUS_PROBE_DEFAULT);
 }
 
 /**
  * @brief Main vmbus driver initialization routine.
  *
  * Here, we
  * - initialize the vmbus driver context
  * - setup various driver entry points
  * - invoke the vmbus hv main init routine
  * - get the irq resource
  * - invoke the vmbus to add the vmbus root device
  * - setup the vmbus root device
  * - retrieve the channel offers
  */
 static int
 vmbus_doattach(struct vmbus_softc *sc)
 {
 	struct sysctl_oid_list *child;
 	struct sysctl_ctx_list *ctx;
 	int ret;
 
 	if (sc->vmbus_flags & VMBUS_FLAG_ATTACHED)
 		return (0);
 
 #ifdef NEW_PCIB
 	vmbus_get_mmio_res(sc->vmbus_dev);
 #endif
 
 	sc->vmbus_flags |= VMBUS_FLAG_ATTACHED;
 
 	sc->vmbus_gpadl = VMBUS_GPADL_START;
 	mtx_init(&sc->vmbus_prichan_lock, "vmbus prichan", NULL, MTX_DEF);
 	TAILQ_INIT(&sc->vmbus_prichans);
 	mtx_init(&sc->vmbus_chan_lock, "vmbus channel", NULL, MTX_DEF);
 	TAILQ_INIT(&sc->vmbus_chans);
 	sc->vmbus_chmap = malloc(
 	    sizeof(struct vmbus_channel *) * VMBUS_CHAN_MAX, M_DEVBUF,
 	    M_WAITOK | M_ZERO);
 
 	/*
 	 * Create context for "post message" Hypercalls
 	 */
 	sc->vmbus_xc = vmbus_xact_ctx_create(bus_get_dma_tag(sc->vmbus_dev),
 	    HYPERCALL_POSTMSGIN_SIZE, VMBUS_MSG_SIZE,
 	    sizeof(struct vmbus_msghc));
 	if (sc->vmbus_xc == NULL) {
 		ret = ENXIO;
 		goto cleanup;
 	}
 
 	/*
 	 * Allocate DMA stuffs.
 	 */
 	ret = vmbus_dma_alloc(sc);
 	if (ret != 0)
 		goto cleanup;
 
 	/*
 	 * Setup interrupt.
 	 */
 	ret = vmbus_intr_setup(sc);
 	if (ret != 0)
 		goto cleanup;
 
 	/*
 	 * Setup SynIC.
 	 */
 	if (bootverbose)
 		device_printf(sc->vmbus_dev, "smp_started = %d\n", smp_started);
 	smp_rendezvous(NULL, vmbus_synic_setup, NULL, sc);
 	sc->vmbus_flags |= VMBUS_FLAG_SYNIC;
 
 	/*
 	 * Initialize vmbus, e.g. connect to Hypervisor.
 	 */
 	ret = vmbus_init(sc);
 	if (ret != 0)
 		goto cleanup;
 
 	if (sc->vmbus_version == VMBUS_VERSION_WS2008 ||
 	    sc->vmbus_version == VMBUS_VERSION_WIN7)
 		sc->vmbus_event_proc = vmbus_event_proc_compat;
 	else
 		sc->vmbus_event_proc = vmbus_event_proc;
 
 	ret = vmbus_scan(sc);
 	if (ret != 0)
 		goto cleanup;
 
 	ctx = device_get_sysctl_ctx(sc->vmbus_dev);
 	child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->vmbus_dev));
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "version",
 	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
 	    vmbus_sysctl_version, "A", "vmbus version");
 
 	return (ret);
 
 cleanup:
 	vmbus_scan_teardown(sc);
 	vmbus_intr_teardown(sc);
 	vmbus_dma_free(sc);
 	if (sc->vmbus_xc != NULL) {
 		vmbus_xact_ctx_destroy(sc->vmbus_xc);
 		sc->vmbus_xc = NULL;
 	}
 	free(sc->vmbus_chmap, M_DEVBUF);
 	mtx_destroy(&sc->vmbus_prichan_lock);
 	mtx_destroy(&sc->vmbus_chan_lock);
 
 	return (ret);
 }
 
 static void
 vmbus_event_proc_dummy(struct vmbus_softc *sc __unused, int cpu __unused)
 {
 }
 
 static int
 vmbus_attach(device_t dev)
 {
 	vmbus_sc = device_get_softc(dev);
 	vmbus_sc->vmbus_dev = dev;
 	vmbus_sc->vmbus_idtvec = -1;
 
 	/*
 	 * Event processing logic will be configured:
 	 * - After the vmbus protocol version negotiation.
 	 * - Before we request channel offers.
 	 */
 	vmbus_sc->vmbus_event_proc = vmbus_event_proc_dummy;
 
 #ifndef EARLY_AP_STARTUP
 	/* 
 	 * If the system has already booted and thread
 	 * scheduling is possible indicated by the global
 	 * cold set to zero, we just call the driver
 	 * initialization directly.
 	 */
 	if (!cold)
 #endif
 		vmbus_doattach(vmbus_sc);
 
 	return (0);
 }
 
 static int
 vmbus_detach(device_t dev)
 {
 	struct vmbus_softc *sc = device_get_softc(dev);
 
 	bus_generic_detach(dev);
 	vmbus_chan_destroy_all(sc);
 
 	vmbus_scan_teardown(sc);
 
 	vmbus_disconnect(sc);
 
 	if (sc->vmbus_flags & VMBUS_FLAG_SYNIC) {
 		sc->vmbus_flags &= ~VMBUS_FLAG_SYNIC;
 		smp_rendezvous(NULL, vmbus_synic_teardown, NULL, NULL);
 	}
 
 	vmbus_intr_teardown(sc);
 	vmbus_dma_free(sc);
 
 	if (sc->vmbus_xc != NULL) {
 		vmbus_xact_ctx_destroy(sc->vmbus_xc);
 		sc->vmbus_xc = NULL;
 	}
 
 	free(sc->vmbus_chmap, M_DEVBUF);
 	mtx_destroy(&sc->vmbus_prichan_lock);
 	mtx_destroy(&sc->vmbus_chan_lock);
 
 #ifdef NEW_PCIB
 	vmbus_free_mmio_res(dev);
 #endif
 
 	return (0);
 }
 
 #ifndef EARLY_AP_STARTUP
 
 static void
 vmbus_sysinit(void *arg __unused)
 {
 	struct vmbus_softc *sc = vmbus_get_softc();
 
 	if (vm_guest != VM_GUEST_HV || sc == NULL)
 		return;
 
 	/* 
 	 * If the system has already booted and thread
 	 * scheduling is possible, as indicated by the
 	 * global cold set to zero, we just call the driver
 	 * initialization directly.
 	 */
 	if (!cold) 
 		vmbus_doattach(sc);
 }
 /*
  * NOTE:
  * We have to start as the last step of SI_SUB_SMP, i.e. after SMP is
  * initialized.
  */
 SYSINIT(vmbus_initialize, SI_SUB_SMP, SI_ORDER_ANY, vmbus_sysinit, NULL);
 
 #endif	/* !EARLY_AP_STARTUP */
Index: projects/clang391-import/sys/dev/hyperv/vmbus/vmbus_chan.c
===================================================================
--- projects/clang391-import/sys/dev/hyperv/vmbus/vmbus_chan.c	(revision 309262)
+++ projects/clang391-import/sys/dev/hyperv/vmbus/vmbus_chan.c	(revision 309263)
@@ -1,1923 +1,2001 @@
 /*-
  * Copyright (c) 2009-2012,2016 Microsoft Corp.
  * Copyright (c) 2012 NetApp Inc.
  * Copyright (c) 2012 Citrix Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 
 #include <machine/atomic.h>
 #include <machine/stdarg.h>
 
 #include <dev/hyperv/include/hyperv_busdma.h>
 #include <dev/hyperv/include/vmbus_xact.h>
 #include <dev/hyperv/vmbus/hyperv_var.h>
 #include <dev/hyperv/vmbus/vmbus_reg.h>
 #include <dev/hyperv/vmbus/vmbus_var.h>
 #include <dev/hyperv/vmbus/vmbus_brvar.h>
 #include <dev/hyperv/vmbus/vmbus_chanvar.h>
 
 static void			vmbus_chan_update_evtflagcnt(
 				    struct vmbus_softc *,
 				    const struct vmbus_channel *);
 static int			vmbus_chan_close_internal(
 				    struct vmbus_channel *);
 static int			vmbus_chan_sysctl_mnf(SYSCTL_HANDLER_ARGS);
 static void			vmbus_chan_sysctl_create(
 				    struct vmbus_channel *);
 static struct vmbus_channel	*vmbus_chan_alloc(struct vmbus_softc *);
 static void			vmbus_chan_free(struct vmbus_channel *);
 static int			vmbus_chan_add(struct vmbus_channel *);
 static void			vmbus_chan_cpu_default(struct vmbus_channel *);
 static int			vmbus_chan_release(struct vmbus_channel *);
 static void			vmbus_chan_set_chmap(struct vmbus_channel *);
 static void			vmbus_chan_clear_chmap(struct vmbus_channel *);
 static void			vmbus_chan_detach(struct vmbus_channel *);
 static bool			vmbus_chan_wait_revoke(
 				    const struct vmbus_channel *);
 
 static void			vmbus_chan_ins_prilist(struct vmbus_softc *,
 				    struct vmbus_channel *);
 static void			vmbus_chan_rem_prilist(struct vmbus_softc *,
 				    struct vmbus_channel *);
 static void			vmbus_chan_ins_list(struct vmbus_softc *,
 				    struct vmbus_channel *);
 static void			vmbus_chan_rem_list(struct vmbus_softc *,
 				    struct vmbus_channel *);
 static void			vmbus_chan_ins_sublist(struct vmbus_channel *,
 				    struct vmbus_channel *);
 static void			vmbus_chan_rem_sublist(struct vmbus_channel *,
 				    struct vmbus_channel *);
 
 static void			vmbus_chan_task(void *, int);
 static void			vmbus_chan_task_nobatch(void *, int);
 static void			vmbus_chan_clrchmap_task(void *, int);
 static void			vmbus_prichan_attach_task(void *, int);
 static void			vmbus_subchan_attach_task(void *, int);
 static void			vmbus_prichan_detach_task(void *, int);
 static void			vmbus_subchan_detach_task(void *, int);
 
 static void			vmbus_chan_msgproc_choffer(struct vmbus_softc *,
 				    const struct vmbus_message *);
 static void			vmbus_chan_msgproc_chrescind(
 				    struct vmbus_softc *,
 				    const struct vmbus_message *);
 
 static int			vmbus_chan_printf(const struct vmbus_channel *,
 				    const char *, ...) __printflike(2, 3);
 
 /*
  * Vmbus channel message processing.
  */
 static const vmbus_chanmsg_proc_t
 vmbus_chan_msgprocs[VMBUS_CHANMSG_TYPE_MAX] = {
 	VMBUS_CHANMSG_PROC(CHOFFER,	vmbus_chan_msgproc_choffer),
 	VMBUS_CHANMSG_PROC(CHRESCIND,	vmbus_chan_msgproc_chrescind),
 
 	VMBUS_CHANMSG_PROC_WAKEUP(CHOPEN_RESP),
 	VMBUS_CHANMSG_PROC_WAKEUP(GPADL_CONNRESP),
 	VMBUS_CHANMSG_PROC_WAKEUP(GPADL_DISCONNRESP)
 };
 
 /*
  * Notify host that there are data pending on our TX bufring.
  */
 static __inline void
 vmbus_chan_signal_tx(const struct vmbus_channel *chan)
 {
 	atomic_set_long(chan->ch_evtflag, chan->ch_evtflag_mask);
 	if (chan->ch_txflags & VMBUS_CHAN_TXF_HASMNF)
 		atomic_set_int(chan->ch_montrig, chan->ch_montrig_mask);
 	else
 		hypercall_signal_event(chan->ch_monprm_dma.hv_paddr);
 }
 
 static void
 vmbus_chan_ins_prilist(struct vmbus_softc *sc, struct vmbus_channel *chan)
 {
 
 	mtx_assert(&sc->vmbus_prichan_lock, MA_OWNED);
 	if (atomic_testandset_int(&chan->ch_stflags,
 	    VMBUS_CHAN_ST_ONPRIL_SHIFT))
 		panic("channel is already on the prilist");
 	TAILQ_INSERT_TAIL(&sc->vmbus_prichans, chan, ch_prilink);
 }
 
 static void
 vmbus_chan_rem_prilist(struct vmbus_softc *sc, struct vmbus_channel *chan)
 {
 
 	mtx_assert(&sc->vmbus_prichan_lock, MA_OWNED);
 	if (atomic_testandclear_int(&chan->ch_stflags,
 	    VMBUS_CHAN_ST_ONPRIL_SHIFT) == 0)
 		panic("channel is not on the prilist");
 	TAILQ_REMOVE(&sc->vmbus_prichans, chan, ch_prilink);
 }
 
 static void
 vmbus_chan_ins_sublist(struct vmbus_channel *prichan,
     struct vmbus_channel *chan)
 {
 
 	mtx_assert(&prichan->ch_subchan_lock, MA_OWNED);
 
 	if (atomic_testandset_int(&chan->ch_stflags,
 	    VMBUS_CHAN_ST_ONSUBL_SHIFT))
 		panic("channel is already on the sublist");
 	TAILQ_INSERT_TAIL(&prichan->ch_subchans, chan, ch_sublink);
 
 	/* Bump sub-channel count. */
 	prichan->ch_subchan_cnt++;
 }
 
 static void
 vmbus_chan_rem_sublist(struct vmbus_channel *prichan,
     struct vmbus_channel *chan)
 {
 
 	mtx_assert(&prichan->ch_subchan_lock, MA_OWNED);
 
 	KASSERT(prichan->ch_subchan_cnt > 0,
 	    ("invalid subchan_cnt %d", prichan->ch_subchan_cnt));
 	prichan->ch_subchan_cnt--;
 
 	if (atomic_testandclear_int(&chan->ch_stflags,
 	    VMBUS_CHAN_ST_ONSUBL_SHIFT) == 0)
 		panic("channel is not on the sublist");
 	TAILQ_REMOVE(&prichan->ch_subchans, chan, ch_sublink);
 }
 
 static void
 vmbus_chan_ins_list(struct vmbus_softc *sc, struct vmbus_channel *chan)
 {
 
 	mtx_assert(&sc->vmbus_chan_lock, MA_OWNED);
 	if (atomic_testandset_int(&chan->ch_stflags,
 	    VMBUS_CHAN_ST_ONLIST_SHIFT))
 		panic("channel is already on the list");
 	TAILQ_INSERT_TAIL(&sc->vmbus_chans, chan, ch_link);
 }
 
 static void
 vmbus_chan_rem_list(struct vmbus_softc *sc, struct vmbus_channel *chan)
 {
 
 	mtx_assert(&sc->vmbus_chan_lock, MA_OWNED);
 	if (atomic_testandclear_int(&chan->ch_stflags,
 	    VMBUS_CHAN_ST_ONLIST_SHIFT) == 0)
 		panic("channel is not on the list");
 	TAILQ_REMOVE(&sc->vmbus_chans, chan, ch_link);
 }
 
 static int
 vmbus_chan_sysctl_mnf(SYSCTL_HANDLER_ARGS)
 {
 	struct vmbus_channel *chan = arg1;
 	int mnf = 0;
 
 	if (chan->ch_txflags & VMBUS_CHAN_TXF_HASMNF)
 		mnf = 1;
 	return sysctl_handle_int(oidp, &mnf, 0, req);
 }
 
 static void
 vmbus_chan_sysctl_create(struct vmbus_channel *chan)
 {
 	struct sysctl_oid *ch_tree, *chid_tree, *br_tree;
 	struct sysctl_ctx_list *ctx;
 	uint32_t ch_id;
 	char name[16];
 
 	/*
 	 * Add sysctl nodes related to this channel to this
 	 * channel's sysctl ctx, so that they can be destroyed
 	 * independently upon close of this channel, which can
 	 * happen even if the device is not detached.
 	 */
 	ctx = &chan->ch_sysctl_ctx;
 	sysctl_ctx_init(ctx);
 
 	/*
 	 * Create dev.NAME.UNIT.channel tree.
 	 */
 	ch_tree = SYSCTL_ADD_NODE(ctx,
 	    SYSCTL_CHILDREN(device_get_sysctl_tree(chan->ch_dev)),
 	    OID_AUTO, "channel", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
 	if (ch_tree == NULL)
 		return;
 
 	/*
 	 * Create dev.NAME.UNIT.channel.CHANID tree.
 	 */
 	if (VMBUS_CHAN_ISPRIMARY(chan))
 		ch_id = chan->ch_id;
 	else
 		ch_id = chan->ch_prichan->ch_id;
 	snprintf(name, sizeof(name), "%d", ch_id);
 	chid_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(ch_tree),
 	    OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
 	if (chid_tree == NULL)
 		return;
 
 	if (!VMBUS_CHAN_ISPRIMARY(chan)) {
 		/*
 		 * Create dev.NAME.UNIT.channel.CHANID.sub tree.
 		 */
 		ch_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(chid_tree),
 		    OID_AUTO, "sub", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
 		if (ch_tree == NULL)
 			return;
 
 		/*
 		 * Create dev.NAME.UNIT.channel.CHANID.sub.SUBIDX tree.
 		 *
 		 * NOTE:
 		 * chid_tree is changed to this new sysctl tree.
 		 */
 		snprintf(name, sizeof(name), "%d", chan->ch_subidx);
 		chid_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(ch_tree),
 		    OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
 		if (chid_tree == NULL)
 			return;
 
 		SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(chid_tree), OID_AUTO,
 		    "chanid", CTLFLAG_RD, &chan->ch_id, 0, "channel id");
 	}
 
 	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(chid_tree), OID_AUTO,
 	    "cpu", CTLFLAG_RD, &chan->ch_cpuid, 0, "owner CPU id");
 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(chid_tree), OID_AUTO,
 	    "mnf", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
 	    chan, 0, vmbus_chan_sysctl_mnf, "I",
 	    "has monitor notification facilities");
 
 	br_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(chid_tree), OID_AUTO,
 	    "br", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
 	if (br_tree != NULL) {
 		/*
 		 * Create sysctl tree for RX bufring.
 		 */
 		vmbus_br_sysctl_create(ctx, br_tree, &chan->ch_rxbr.rxbr, "rx");
 		/*
 		 * Create sysctl tree for TX bufring.
 		 */
 		vmbus_br_sysctl_create(ctx, br_tree, &chan->ch_txbr.txbr, "tx");
 	}
 }
 
 int
 vmbus_chan_open(struct vmbus_channel *chan, int txbr_size, int rxbr_size,
     const void *udata, int udlen, vmbus_chan_callback_t cb, void *cbarg)
 {
 	struct vmbus_chan_br cbr;
 	int error;
 
 	/*
 	 * Allocate the TX+RX bufrings.
 	 */
 	KASSERT(chan->ch_bufring == NULL, ("bufrings are allocated"));
 	chan->ch_bufring = hyperv_dmamem_alloc(bus_get_dma_tag(chan->ch_dev),
 	    PAGE_SIZE, 0, txbr_size + rxbr_size, &chan->ch_bufring_dma,
 	    BUS_DMA_WAITOK);
 	if (chan->ch_bufring == NULL) {
 		vmbus_chan_printf(chan, "bufring allocation failed\n");
 		return (ENOMEM);
 	}
 
 	cbr.cbr = chan->ch_bufring;
 	cbr.cbr_paddr = chan->ch_bufring_dma.hv_paddr;
 	cbr.cbr_txsz = txbr_size;
 	cbr.cbr_rxsz = rxbr_size;
 
 	error = vmbus_chan_open_br(chan, &cbr, udata, udlen, cb, cbarg);
 	if (error) {
 		if (error == EISCONN) {
 			/*
 			 * XXX
 			 * The bufring GPADL is still connected; abandon
 			 * this bufring, instead of having mysterious
 			 * crash or trashed data later on.
 			 */
 			vmbus_chan_printf(chan, "chan%u bufring GPADL "
 			    "is still connected upon channel open error; "
 			    "leak %d bytes memory\n", chan->ch_id,
 			    txbr_size + rxbr_size);
 		} else {
 			hyperv_dmamem_free(&chan->ch_bufring_dma,
 			    chan->ch_bufring);
 		}
 		chan->ch_bufring = NULL;
 	}
 	return (error);
 }
 
 int
 vmbus_chan_open_br(struct vmbus_channel *chan, const struct vmbus_chan_br *cbr,
     const void *udata, int udlen, vmbus_chan_callback_t cb, void *cbarg)
 {
 	struct vmbus_softc *sc = chan->ch_vmbus;
-	const struct vmbus_chanmsg_chopen_resp *resp;
 	const struct vmbus_message *msg;
 	struct vmbus_chanmsg_chopen *req;
 	struct vmbus_msghc *mh;
 	uint32_t status;
 	int error, txbr_size, rxbr_size;
 	task_fn_t *task_fn;
 	uint8_t *br;
 
 	if (udlen > VMBUS_CHANMSG_CHOPEN_UDATA_SIZE) {
 		vmbus_chan_printf(chan,
 		    "invalid udata len %d for chan%u\n", udlen, chan->ch_id);
 		return (EINVAL);
 	}
 
 	br = cbr->cbr;
 	txbr_size = cbr->cbr_txsz;
 	rxbr_size = cbr->cbr_rxsz;
 	KASSERT((txbr_size & PAGE_MASK) == 0,
 	    ("send bufring size is not multiple page"));
 	KASSERT((rxbr_size & PAGE_MASK) == 0,
 	    ("recv bufring size is not multiple page"));
 	KASSERT((cbr->cbr_paddr & PAGE_MASK) == 0,
 	    ("bufring is not page aligned"));
 
 	/*
 	 * Zero out the TX/RX bufrings, in case that they were used before.
 	 */
 	memset(br, 0, txbr_size + rxbr_size);
 
 	if (atomic_testandset_int(&chan->ch_stflags,
 	    VMBUS_CHAN_ST_OPENED_SHIFT))
 		panic("double-open chan%u", chan->ch_id);
 
 	chan->ch_cb = cb;
 	chan->ch_cbarg = cbarg;
 
 	vmbus_chan_update_evtflagcnt(sc, chan);
 
 	chan->ch_tq = VMBUS_PCPU_GET(chan->ch_vmbus, event_tq, chan->ch_cpuid);
 	if (chan->ch_flags & VMBUS_CHAN_FLAG_BATCHREAD)
 		task_fn = vmbus_chan_task;
 	else
 		task_fn = vmbus_chan_task_nobatch;
 	TASK_INIT(&chan->ch_task, 0, task_fn, chan);
 
 	/* TX bufring comes first */
 	vmbus_txbr_setup(&chan->ch_txbr, br, txbr_size);
 	/* RX bufring immediately follows TX bufring */
 	vmbus_rxbr_setup(&chan->ch_rxbr, br + txbr_size, rxbr_size);
 
 	/* Create sysctl tree for this channel */
 	vmbus_chan_sysctl_create(chan);
 
 	/*
 	 * Connect the bufrings, both RX and TX, to this channel.
 	 */
 	KASSERT(chan->ch_bufring_gpadl == 0,
 	    ("bufring GPADL is still connected"));
 	error = vmbus_chan_gpadl_connect(chan, cbr->cbr_paddr,
 	    txbr_size + rxbr_size, &chan->ch_bufring_gpadl);
 	if (error) {
 		vmbus_chan_printf(chan,
 		    "failed to connect bufring GPADL to chan%u\n", chan->ch_id);
 		goto failed;
 	}
 
 	/*
 	 * Install this channel, before it is opened, but after everything
 	 * else has been setup.
 	 */
 	vmbus_chan_set_chmap(chan);
 
 	/*
 	 * Open channel w/ the bufring GPADL on the target CPU.
 	 */
 	mh = vmbus_msghc_get(sc, sizeof(*req));
 	if (mh == NULL) {
 		vmbus_chan_printf(chan,
 		    "can not get msg hypercall for chopen(chan%u)\n",
 		    chan->ch_id);
 		error = ENXIO;
 		goto failed;
 	}
 
 	req = vmbus_msghc_dataptr(mh);
 	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHOPEN;
 	req->chm_chanid = chan->ch_id;
 	req->chm_openid = chan->ch_id;
 	req->chm_gpadl = chan->ch_bufring_gpadl;
 	req->chm_vcpuid = chan->ch_vcpuid;
 	req->chm_txbr_pgcnt = txbr_size >> PAGE_SHIFT;
 	if (udlen > 0)
 		memcpy(req->chm_udata, udata, udlen);
 
 	error = vmbus_msghc_exec(sc, mh);
 	if (error) {
 		vmbus_chan_printf(chan,
 		    "chopen(chan%u) msg hypercall exec failed: %d\n",
 		    chan->ch_id, error);
 		vmbus_msghc_put(sc, mh);
 		goto failed;
 	}
 
-	msg = vmbus_msghc_wait_result(sc, mh);
-	resp = (const struct vmbus_chanmsg_chopen_resp *)msg->msg_data;
-	status = resp->chm_status;
+	for (;;) {
+		msg = vmbus_msghc_poll_result(sc, mh);
+		if (msg != NULL)
+			break;
+		if (vmbus_chan_is_revoked(chan)) {
+			int i;
 
+			/*
+			 * NOTE:
+			 * Hypervisor does _not_ send response CHOPEN to
+			 * a revoked channel.
+			 */
+			vmbus_chan_printf(chan,
+			    "chan%u is revoked, when it is being opened\n",
+			    chan->ch_id);
+
+			/*
+			 * XXX
+			 * Add extra delay before cancel the hypercall
+			 * execution; mainly to close any possible
+			 * CHRESCIND and CHOPEN_RESP races on the
+			 * hypervisor side.
+			 */
+#define REVOKE_LINGER	100
+			for (i = 0; i < REVOKE_LINGER; ++i) {
+				msg = vmbus_msghc_poll_result(sc, mh);
+				if (msg != NULL)
+					break;
+				DELAY(1000);
+			}
+#undef REVOKE_LINGER
+			if (msg == NULL)
+				vmbus_msghc_exec_cancel(sc, mh);
+			break;
+		}
+		DELAY(1000);
+	}
+	if (msg != NULL) {
+		status = ((const struct vmbus_chanmsg_chopen_resp *)
+		    msg->msg_data)->chm_status;
+	} else {
+		/* XXX any non-0 value is ok here. */
+		status = 0xff;
+	}
+
 	vmbus_msghc_put(sc, mh);
 
 	if (status == 0) {
 		if (bootverbose)
 			vmbus_chan_printf(chan, "chan%u opened\n", chan->ch_id);
 		return (0);
 	}
 
 	vmbus_chan_printf(chan, "failed to open chan%u\n", chan->ch_id);
 	error = ENXIO;
 
 failed:
 	sysctl_ctx_free(&chan->ch_sysctl_ctx);
 	vmbus_chan_clear_chmap(chan);
 	if (chan->ch_bufring_gpadl != 0) {
 		int error1;
 
 		error1 = vmbus_chan_gpadl_disconnect(chan,
 		    chan->ch_bufring_gpadl);
 		if (error1) {
 			/*
 			 * Give caller a hint that the bufring GPADL is still
 			 * connected.
 			 */
 			error = EISCONN;
 		}
 		chan->ch_bufring_gpadl = 0;
 	}
 	atomic_clear_int(&chan->ch_stflags, VMBUS_CHAN_ST_OPENED);
 	return (error);
 }
 
 int
 vmbus_chan_gpadl_connect(struct vmbus_channel *chan, bus_addr_t paddr,
     int size, uint32_t *gpadl0)
 {
 	struct vmbus_softc *sc = chan->ch_vmbus;
 	struct vmbus_msghc *mh;
 	struct vmbus_chanmsg_gpadl_conn *req;
 	const struct vmbus_message *msg;
 	size_t reqsz;
 	uint32_t gpadl, status;
 	int page_count, range_len, i, cnt, error;
 	uint64_t page_id;
 
+	KASSERT(*gpadl0 == 0, ("GPADL is not zero"));
+
 	/*
 	 * Preliminary checks.
 	 */
 
 	KASSERT((size & PAGE_MASK) == 0,
 	    ("invalid GPA size %d, not multiple page size", size));
 	page_count = size >> PAGE_SHIFT;
 
 	KASSERT((paddr & PAGE_MASK) == 0,
 	    ("GPA is not page aligned %jx", (uintmax_t)paddr));
 	page_id = paddr >> PAGE_SHIFT;
 
 	range_len = __offsetof(struct vmbus_gpa_range, gpa_page[page_count]);
 	/*
 	 * We don't support multiple GPA ranges.
 	 */
 	if (range_len > UINT16_MAX) {
 		vmbus_chan_printf(chan, "GPA too large, %d pages\n",
 		    page_count);
 		return EOPNOTSUPP;
 	}
 
 	/*
 	 * Allocate GPADL id.
 	 */
 	gpadl = vmbus_gpadl_alloc(sc);
 
 	/*
 	 * Connect this GPADL to the target channel.
 	 *
 	 * NOTE:
 	 * Since each message can only hold small set of page
 	 * addresses, several messages may be required to
 	 * complete the connection.
 	 */
 	if (page_count > VMBUS_CHANMSG_GPADL_CONN_PGMAX)
 		cnt = VMBUS_CHANMSG_GPADL_CONN_PGMAX;
 	else
 		cnt = page_count;
 	page_count -= cnt;
 
 	reqsz = __offsetof(struct vmbus_chanmsg_gpadl_conn,
 	    chm_range.gpa_page[cnt]);
 	mh = vmbus_msghc_get(sc, reqsz);
 	if (mh == NULL) {
 		vmbus_chan_printf(chan,
 		    "can not get msg hypercall for gpadl_conn(chan%u)\n",
 		    chan->ch_id);
 		return EIO;
 	}
 
 	req = vmbus_msghc_dataptr(mh);
 	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_GPADL_CONN;
 	req->chm_chanid = chan->ch_id;
 	req->chm_gpadl = gpadl;
 	req->chm_range_len = range_len;
 	req->chm_range_cnt = 1;
 	req->chm_range.gpa_len = size;
 	req->chm_range.gpa_ofs = 0;
 	for (i = 0; i < cnt; ++i)
 		req->chm_range.gpa_page[i] = page_id++;
 
 	error = vmbus_msghc_exec(sc, mh);
 	if (error) {
 		vmbus_chan_printf(chan,
 		    "gpadl_conn(chan%u) msg hypercall exec failed: %d\n",
 		    chan->ch_id, error);
 		vmbus_msghc_put(sc, mh);
 		return error;
 	}
 
 	while (page_count > 0) {
 		struct vmbus_chanmsg_gpadl_subconn *subreq;
 
 		if (page_count > VMBUS_CHANMSG_GPADL_SUBCONN_PGMAX)
 			cnt = VMBUS_CHANMSG_GPADL_SUBCONN_PGMAX;
 		else
 			cnt = page_count;
 		page_count -= cnt;
 
 		reqsz = __offsetof(struct vmbus_chanmsg_gpadl_subconn,
 		    chm_gpa_page[cnt]);
 		vmbus_msghc_reset(mh, reqsz);
 
 		subreq = vmbus_msghc_dataptr(mh);
 		subreq->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_GPADL_SUBCONN;
 		subreq->chm_gpadl = gpadl;
 		for (i = 0; i < cnt; ++i)
 			subreq->chm_gpa_page[i] = page_id++;
 
 		vmbus_msghc_exec_noresult(mh);
 	}
 	KASSERT(page_count == 0, ("invalid page count %d", page_count));
 
 	msg = vmbus_msghc_wait_result(sc, mh);
 	status = ((const struct vmbus_chanmsg_gpadl_connresp *)
 	    msg->msg_data)->chm_status;
 
 	vmbus_msghc_put(sc, mh);
 
 	if (status != 0) {
 		vmbus_chan_printf(chan, "gpadl_conn(chan%u) failed: %u\n",
 		    chan->ch_id, status);
 		return EIO;
 	}
 
 	/* Done; commit the GPADL id. */
 	*gpadl0 = gpadl;
 	if (bootverbose) {
 		vmbus_chan_printf(chan, "gpadl_conn(chan%u) succeeded\n",
 		    chan->ch_id);
 	}
 	return 0;
 }
 
 static bool
 vmbus_chan_wait_revoke(const struct vmbus_channel *chan)
 {
 #define WAIT_COUNT	200	/* 200ms */
 
 	int i;
 
 	for (i = 0; i < WAIT_COUNT; ++i) {
 		if (vmbus_chan_is_revoked(chan))
 			return (true);
 		/* Not sure about the context; use busy-wait. */
 		DELAY(1000);
 	}
 	return (false);
 
 #undef WAIT_COUNT
 }
 
 /*
  * Disconnect the GPA from the target channel
  */
 int
 vmbus_chan_gpadl_disconnect(struct vmbus_channel *chan, uint32_t gpadl)
 {
 	struct vmbus_softc *sc = chan->ch_vmbus;
 	struct vmbus_msghc *mh;
 	struct vmbus_chanmsg_gpadl_disconn *req;
 	int error;
 
+	KASSERT(gpadl != 0, ("GPADL is zero"));
+
 	mh = vmbus_msghc_get(sc, sizeof(*req));
 	if (mh == NULL) {
 		vmbus_chan_printf(chan,
 		    "can not get msg hypercall for gpadl_disconn(chan%u)\n",
 		    chan->ch_id);
 		return (EBUSY);
 	}
 
 	req = vmbus_msghc_dataptr(mh);
 	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_GPADL_DISCONN;
 	req->chm_chanid = chan->ch_id;
 	req->chm_gpadl = gpadl;
 
 	error = vmbus_msghc_exec(sc, mh);
 	if (error) {
 		vmbus_msghc_put(sc, mh);
 
 		if (vmbus_chan_wait_revoke(chan)) {
 			/*
 			 * Error is benign; this channel is revoked,
 			 * so this GPADL will not be touched anymore.
 			 */
 			vmbus_chan_printf(chan,
 			    "gpadl_disconn(revoked chan%u) msg hypercall "
 			    "exec failed: %d\n", chan->ch_id, error);
 			return (0);
 		}
 		vmbus_chan_printf(chan,
 		    "gpadl_disconn(chan%u) msg hypercall exec failed: %d\n",
 		    chan->ch_id, error);
 		return (error);
 	}
 
 	vmbus_msghc_wait_result(sc, mh);
 	/* Discard result; no useful information */
 	vmbus_msghc_put(sc, mh);
 
 	return (0);
 }
 
 static void
 vmbus_chan_detach(struct vmbus_channel *chan)
 {
 	int refs;
 
 	KASSERT(chan->ch_refs > 0, ("chan%u: invalid refcnt %d",
 	    chan->ch_id, chan->ch_refs));
 	refs = atomic_fetchadd_int(&chan->ch_refs, -1);
 #ifdef INVARIANTS
 	if (VMBUS_CHAN_ISPRIMARY(chan)) {
 		KASSERT(refs == 1, ("chan%u: invalid refcnt %d for prichan",
 		    chan->ch_id, refs + 1));
 	}
 #endif
 	if (refs == 1) {
 		/*
 		 * Detach the target channel.
 		 */
 		if (bootverbose) {
 			vmbus_chan_printf(chan, "chan%u detached\n",
 			    chan->ch_id);
 		}
 		taskqueue_enqueue(chan->ch_mgmt_tq, &chan->ch_detach_task);
 	}
 }
 
 static void
 vmbus_chan_clrchmap_task(void *xchan, int pending __unused)
 {
 	struct vmbus_channel *chan = xchan;
 
 	critical_enter();
 	chan->ch_vmbus->vmbus_chmap[chan->ch_id] = NULL;
 	critical_exit();
 }
 
 static void
 vmbus_chan_clear_chmap(struct vmbus_channel *chan)
 {
 	struct task chmap_task;
 
 	TASK_INIT(&chmap_task, 0, vmbus_chan_clrchmap_task, chan);
 	taskqueue_enqueue(chan->ch_tq, &chmap_task);
 	taskqueue_drain(chan->ch_tq, &chmap_task);
 }
 
 static void
 vmbus_chan_set_chmap(struct vmbus_channel *chan)
 {
 	__compiler_membar();
 	chan->ch_vmbus->vmbus_chmap[chan->ch_id] = chan;
 }
 
 static int
 vmbus_chan_close_internal(struct vmbus_channel *chan)
 {
 	struct vmbus_softc *sc = chan->ch_vmbus;
 	struct vmbus_msghc *mh;
 	struct vmbus_chanmsg_chclose *req;
 	uint32_t old_stflags;
 	int error;
 
 	/*
 	 * NOTE:
 	 * Sub-channels are closed upon their primary channel closing,
 	 * so they can be closed even before they are opened.
 	 */
 	for (;;) {
 		old_stflags = chan->ch_stflags;
 		if (atomic_cmpset_int(&chan->ch_stflags, old_stflags,
 		    old_stflags & ~VMBUS_CHAN_ST_OPENED))
 			break;
 	}
 	if ((old_stflags & VMBUS_CHAN_ST_OPENED) == 0) {
 		/* Not opened yet; done */
 		if (bootverbose) {
 			vmbus_chan_printf(chan, "chan%u not opened\n",
 			    chan->ch_id);
 		}
 		return (0);
 	}
 
 	/*
 	 * Free this channel's sysctl tree attached to its device's
 	 * sysctl tree.
 	 */
 	sysctl_ctx_free(&chan->ch_sysctl_ctx);
 
 	/*
 	 * NOTE:
 	 * Order is critical.  This channel _must_ be uninstalled first,
 	 * else the channel task may be enqueued by the IDT after it has
 	 * been drained.
 	 */
 	vmbus_chan_clear_chmap(chan);
 	taskqueue_drain(chan->ch_tq, &chan->ch_task);
 	chan->ch_tq = NULL;
 
 	/*
 	 * Close this channel.
 	 */
 	mh = vmbus_msghc_get(sc, sizeof(*req));
 	if (mh == NULL) {
 		vmbus_chan_printf(chan,
 		    "can not get msg hypercall for chclose(chan%u)\n",
 		    chan->ch_id);
 		error = ENXIO;
 		goto disconnect;
 	}
 
 	req = vmbus_msghc_dataptr(mh);
 	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHCLOSE;
 	req->chm_chanid = chan->ch_id;
 
 	error = vmbus_msghc_exec_noresult(mh);
 	vmbus_msghc_put(sc, mh);
 
 	if (error) {
 		vmbus_chan_printf(chan,
 		    "chclose(chan%u) msg hypercall exec failed: %d\n",
 		    chan->ch_id, error);
 		goto disconnect;
 	}
 
 	if (bootverbose)
 		vmbus_chan_printf(chan, "chan%u closed\n", chan->ch_id);
 
 disconnect:
 	/*
 	 * Disconnect the TX+RX bufrings from this channel.
 	 */
 	if (chan->ch_bufring_gpadl != 0) {
 		int error1;
 
 		error1 = vmbus_chan_gpadl_disconnect(chan,
 		    chan->ch_bufring_gpadl);
 		if (error1) {
 			/*
 			 * XXX
 			 * The bufring GPADL is still connected; abandon
 			 * this bufring, instead of having mysterious
 			 * crash or trashed data later on.
 			 */
 			vmbus_chan_printf(chan, "chan%u bufring GPADL "
 			    "is still connected after close\n", chan->ch_id);
 			chan->ch_bufring = NULL;
 			/*
 			 * Give caller a hint that the bufring GPADL is
 			 * still connected.
 			 */
 			error = EISCONN;
 		}
 		chan->ch_bufring_gpadl = 0;
 	}
 
 	/*
 	 * Destroy the TX+RX bufrings.
 	 */
 	if (chan->ch_bufring != NULL) {
 		hyperv_dmamem_free(&chan->ch_bufring_dma, chan->ch_bufring);
 		chan->ch_bufring = NULL;
 	}
 	return (error);
 }
 
 int
 vmbus_chan_close_direct(struct vmbus_channel *chan)
 {
 	int error;
 
 #ifdef INVARIANTS
 	if (VMBUS_CHAN_ISPRIMARY(chan)) {
 		struct vmbus_channel *subchan;
 
 		/*
 		 * All sub-channels _must_ have been closed, or are _not_
 		 * opened at all.
 		 */
 		mtx_lock(&chan->ch_subchan_lock);
 		TAILQ_FOREACH(subchan, &chan->ch_subchans, ch_sublink) {
 			KASSERT(
 			   (subchan->ch_stflags & VMBUS_CHAN_ST_OPENED) == 0,
 			   ("chan%u: subchan%u is still opened",
 			    chan->ch_id, subchan->ch_subidx));
 		}
 		mtx_unlock(&chan->ch_subchan_lock);
 	}
 #endif
 
 	error = vmbus_chan_close_internal(chan);
 	if (!VMBUS_CHAN_ISPRIMARY(chan)) {
 		/*
 		 * This sub-channel is referenced, when it is linked to
 		 * the primary channel; drop that reference now.
 		 */
 		vmbus_chan_detach(chan);
 	}
 	return (error);
 }
 
 /*
  * Caller should make sure that all sub-channels have
  * been added to 'chan' and all to-be-closed channels
  * are not being opened.
  */
 void
 vmbus_chan_close(struct vmbus_channel *chan)
 {
 	int subchan_cnt;
 
 	if (!VMBUS_CHAN_ISPRIMARY(chan)) {
 		/*
 		 * Sub-channel is closed when its primary channel
 		 * is closed; done.
 		 */
 		return;
 	}
 
 	/*
 	 * Close all sub-channels, if any.
 	 */
 	subchan_cnt = chan->ch_subchan_cnt;
 	if (subchan_cnt > 0) {
 		struct vmbus_channel **subchan;
 		int i;
 
 		subchan = vmbus_subchan_get(chan, subchan_cnt);
 		for (i = 0; i < subchan_cnt; ++i) {
 			vmbus_chan_close_internal(subchan[i]);
 			/*
 			 * This sub-channel is referenced, when it is
 			 * linked to the primary channel; drop that
 			 * reference now.
 			 */
 			vmbus_chan_detach(subchan[i]);
 		}
 		vmbus_subchan_rel(subchan, subchan_cnt);
 	}
 
 	/* Then close the primary channel. */
 	vmbus_chan_close_internal(chan);
 }
 
 void
 vmbus_chan_intr_drain(struct vmbus_channel *chan)
 {
 
 	taskqueue_drain(chan->ch_tq, &chan->ch_task);
 }
 
 int
 vmbus_chan_send(struct vmbus_channel *chan, uint16_t type, uint16_t flags,
     void *data, int dlen, uint64_t xactid)
 {
 	struct vmbus_chanpkt pkt;
 	int pktlen, pad_pktlen, hlen, error;
 	uint64_t pad = 0;
 	struct iovec iov[3];
 	boolean_t send_evt;
 
 	hlen = sizeof(pkt);
 	pktlen = hlen + dlen;
 	pad_pktlen = VMBUS_CHANPKT_TOTLEN(pktlen);
 	KASSERT(pad_pktlen <= vmbus_txbr_maxpktsz(&chan->ch_txbr),
 	    ("invalid packet size %d", pad_pktlen));
 
 	pkt.cp_hdr.cph_type = type;
 	pkt.cp_hdr.cph_flags = flags;
 	VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_hlen, hlen);
 	VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_tlen, pad_pktlen);
 	pkt.cp_hdr.cph_xactid = xactid;
 
 	iov[0].iov_base = &pkt;
 	iov[0].iov_len = hlen;
 	iov[1].iov_base = data;
 	iov[1].iov_len = dlen;
 	iov[2].iov_base = &pad;
 	iov[2].iov_len = pad_pktlen - pktlen;
 
 	error = vmbus_txbr_write(&chan->ch_txbr, iov, 3, &send_evt);
 	if (!error && send_evt)
 		vmbus_chan_signal_tx(chan);
 	return error;
 }
 
 int
 vmbus_chan_send_sglist(struct vmbus_channel *chan,
     struct vmbus_gpa sg[], int sglen, void *data, int dlen, uint64_t xactid)
 {
 	struct vmbus_chanpkt_sglist pkt;
 	int pktlen, pad_pktlen, hlen, error;
 	struct iovec iov[4];
 	boolean_t send_evt;
 	uint64_t pad = 0;
 
 	hlen = __offsetof(struct vmbus_chanpkt_sglist, cp_gpa[sglen]);
 	pktlen = hlen + dlen;
 	pad_pktlen = VMBUS_CHANPKT_TOTLEN(pktlen);
 	KASSERT(pad_pktlen <= vmbus_txbr_maxpktsz(&chan->ch_txbr),
 	    ("invalid packet size %d", pad_pktlen));
 
 	pkt.cp_hdr.cph_type = VMBUS_CHANPKT_TYPE_GPA;
 	pkt.cp_hdr.cph_flags = VMBUS_CHANPKT_FLAG_RC;
 	VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_hlen, hlen);
 	VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_tlen, pad_pktlen);
 	pkt.cp_hdr.cph_xactid = xactid;
 	pkt.cp_rsvd = 0;
 	pkt.cp_gpa_cnt = sglen;
 
 	iov[0].iov_base = &pkt;
 	iov[0].iov_len = sizeof(pkt);
 	iov[1].iov_base = sg;
 	iov[1].iov_len = sizeof(struct vmbus_gpa) * sglen;
 	iov[2].iov_base = data;
 	iov[2].iov_len = dlen;
 	iov[3].iov_base = &pad;
 	iov[3].iov_len = pad_pktlen - pktlen;
 
 	error = vmbus_txbr_write(&chan->ch_txbr, iov, 4, &send_evt);
 	if (!error && send_evt)
 		vmbus_chan_signal_tx(chan);
 	return error;
 }
 
 int
 vmbus_chan_send_prplist(struct vmbus_channel *chan,
     struct vmbus_gpa_range *prp, int prp_cnt, void *data, int dlen,
     uint64_t xactid)
 {
 	struct vmbus_chanpkt_prplist pkt;
 	int pktlen, pad_pktlen, hlen, error;
 	struct iovec iov[4];
 	boolean_t send_evt;
 	uint64_t pad = 0;
 
 	hlen = __offsetof(struct vmbus_chanpkt_prplist,
 	    cp_range[0].gpa_page[prp_cnt]);
 	pktlen = hlen + dlen;
 	pad_pktlen = VMBUS_CHANPKT_TOTLEN(pktlen);
 	KASSERT(pad_pktlen <= vmbus_txbr_maxpktsz(&chan->ch_txbr),
 	    ("invalid packet size %d", pad_pktlen));
 
 	pkt.cp_hdr.cph_type = VMBUS_CHANPKT_TYPE_GPA;
 	pkt.cp_hdr.cph_flags = VMBUS_CHANPKT_FLAG_RC;
 	VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_hlen, hlen);
 	VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_tlen, pad_pktlen);
 	pkt.cp_hdr.cph_xactid = xactid;
 	pkt.cp_rsvd = 0;
 	pkt.cp_range_cnt = 1;
 
 	iov[0].iov_base = &pkt;
 	iov[0].iov_len = sizeof(pkt);
 	iov[1].iov_base = prp;
 	iov[1].iov_len = __offsetof(struct vmbus_gpa_range, gpa_page[prp_cnt]);
 	iov[2].iov_base = data;
 	iov[2].iov_len = dlen;
 	iov[3].iov_base = &pad;
 	iov[3].iov_len = pad_pktlen - pktlen;
 
 	error = vmbus_txbr_write(&chan->ch_txbr, iov, 4, &send_evt);
 	if (!error && send_evt)
 		vmbus_chan_signal_tx(chan);
 	return error;
 }
 
 int
 vmbus_chan_recv(struct vmbus_channel *chan, void *data, int *dlen0,
     uint64_t *xactid)
 {
 	struct vmbus_chanpkt_hdr pkt;
 	int error, dlen, hlen;
 
 	error = vmbus_rxbr_peek(&chan->ch_rxbr, &pkt, sizeof(pkt));
 	if (error)
 		return (error);
 
 	if (__predict_false(pkt.cph_hlen < VMBUS_CHANPKT_HLEN_MIN)) {
 		vmbus_chan_printf(chan, "invalid hlen %u\n", pkt.cph_hlen);
 		/* XXX this channel is dead actually. */
 		return (EIO);
 	}
 	if (__predict_false(pkt.cph_hlen > pkt.cph_tlen)) {
 		vmbus_chan_printf(chan, "invalid hlen %u and tlen %u\n",
 		    pkt.cph_hlen, pkt.cph_tlen);
 		/* XXX this channel is dead actually. */
 		return (EIO);
 	}
 
 	hlen = VMBUS_CHANPKT_GETLEN(pkt.cph_hlen);
 	dlen = VMBUS_CHANPKT_GETLEN(pkt.cph_tlen) - hlen;
 
 	if (*dlen0 < dlen) {
 		/* Return the size of this packet's data. */
 		*dlen0 = dlen;
 		return (ENOBUFS);
 	}
 
 	*xactid = pkt.cph_xactid;
 	*dlen0 = dlen;
 
 	/* Skip packet header */
 	error = vmbus_rxbr_read(&chan->ch_rxbr, data, dlen, hlen);
 	KASSERT(!error, ("vmbus_rxbr_read failed"));
 
 	return (0);
 }
 
 int
 vmbus_chan_recv_pkt(struct vmbus_channel *chan,
     struct vmbus_chanpkt_hdr *pkt, int *pktlen0)
 {
 	int error, pktlen, pkt_hlen;
 
 	pkt_hlen = sizeof(*pkt);
 	error = vmbus_rxbr_peek(&chan->ch_rxbr, pkt, pkt_hlen);
 	if (error)
 		return (error);
 
 	if (__predict_false(pkt->cph_hlen < VMBUS_CHANPKT_HLEN_MIN)) {
 		vmbus_chan_printf(chan, "invalid hlen %u\n", pkt->cph_hlen);
 		/* XXX this channel is dead actually. */
 		return (EIO);
 	}
 	if (__predict_false(pkt->cph_hlen > pkt->cph_tlen)) {
 		vmbus_chan_printf(chan, "invalid hlen %u and tlen %u\n",
 		    pkt->cph_hlen, pkt->cph_tlen);
 		/* XXX this channel is dead actually. */
 		return (EIO);
 	}
 
 	pktlen = VMBUS_CHANPKT_GETLEN(pkt->cph_tlen);
 	if (*pktlen0 < pktlen) {
 		/* Return the size of this packet. */
 		*pktlen0 = pktlen;
 		return (ENOBUFS);
 	}
 	*pktlen0 = pktlen;
 
 	/*
 	 * Skip the fixed-size packet header, which has been filled
 	 * by the above vmbus_rxbr_peek().
 	 */
 	error = vmbus_rxbr_read(&chan->ch_rxbr, pkt + 1,
 	    pktlen - pkt_hlen, pkt_hlen);
 	KASSERT(!error, ("vmbus_rxbr_read failed"));
 
 	return (0);
 }
 
 static void
 vmbus_chan_task(void *xchan, int pending __unused)
 {
 	struct vmbus_channel *chan = xchan;
 	vmbus_chan_callback_t cb = chan->ch_cb;
 	void *cbarg = chan->ch_cbarg;
 
 	/*
 	 * Optimize host to guest signaling by ensuring:
 	 * 1. While reading the channel, we disable interrupts from
 	 *    host.
 	 * 2. Ensure that we process all posted messages from the host
 	 *    before returning from this callback.
 	 * 3. Once we return, enable signaling from the host. Once this
 	 *    state is set we check to see if additional packets are
 	 *    available to read. In this case we repeat the process.
 	 *
 	 * NOTE: Interrupt has been disabled in the ISR.
 	 */
 	for (;;) {
 		uint32_t left;
 
 		cb(chan, cbarg);
 
 		left = vmbus_rxbr_intr_unmask(&chan->ch_rxbr);
 		if (left == 0) {
 			/* No more data in RX bufring; done */
 			break;
 		}
 		vmbus_rxbr_intr_mask(&chan->ch_rxbr);
 	}
 }
 
 static void
 vmbus_chan_task_nobatch(void *xchan, int pending __unused)
 {
 	struct vmbus_channel *chan = xchan;
 
 	chan->ch_cb(chan, chan->ch_cbarg);
 }
 
 static __inline void
 vmbus_event_flags_proc(struct vmbus_softc *sc, volatile u_long *event_flags,
     int flag_cnt)
 {
 	int f;
 
 	for (f = 0; f < flag_cnt; ++f) {
 		uint32_t chid_base;
 		u_long flags;
 		int chid_ofs;
 
 		if (event_flags[f] == 0)
 			continue;
 
 		flags = atomic_swap_long(&event_flags[f], 0);
 		chid_base = f << VMBUS_EVTFLAG_SHIFT;
 
 		while ((chid_ofs = ffsl(flags)) != 0) {
 			struct vmbus_channel *chan;
 
 			--chid_ofs; /* NOTE: ffsl is 1-based */
 			flags &= ~(1UL << chid_ofs);
 
 			chan = sc->vmbus_chmap[chid_base + chid_ofs];
 			if (__predict_false(chan == NULL)) {
 				/* Channel is closed. */
 				continue;
 			}
 			__compiler_membar();
 
 			if (chan->ch_flags & VMBUS_CHAN_FLAG_BATCHREAD)
 				vmbus_rxbr_intr_mask(&chan->ch_rxbr);
 			taskqueue_enqueue(chan->ch_tq, &chan->ch_task);
 		}
 	}
 }
 
 void
 vmbus_event_proc(struct vmbus_softc *sc, int cpu)
 {
 	struct vmbus_evtflags *eventf;
 
 	/*
 	 * On Host with Win8 or above, the event page can be checked directly
 	 * to get the id of the channel that has the pending interrupt.
 	 */
 	eventf = VMBUS_PCPU_GET(sc, event_flags, cpu) + VMBUS_SINT_MESSAGE;
 	vmbus_event_flags_proc(sc, eventf->evt_flags,
 	    VMBUS_PCPU_GET(sc, event_flags_cnt, cpu));
 }
 
 void
 vmbus_event_proc_compat(struct vmbus_softc *sc, int cpu)
 {
 	struct vmbus_evtflags *eventf;
 
 	eventf = VMBUS_PCPU_GET(sc, event_flags, cpu) + VMBUS_SINT_MESSAGE;
 	if (atomic_testandclear_long(&eventf->evt_flags[0], 0)) {
 		vmbus_event_flags_proc(sc, sc->vmbus_rx_evtflags,
 		    VMBUS_CHAN_MAX_COMPAT >> VMBUS_EVTFLAG_SHIFT);
 	}
 }
 
 static void
 vmbus_chan_update_evtflagcnt(struct vmbus_softc *sc,
     const struct vmbus_channel *chan)
 {
 	volatile int *flag_cnt_ptr;
 	int flag_cnt;
 
 	flag_cnt = (chan->ch_id / VMBUS_EVTFLAG_LEN) + 1;
 	flag_cnt_ptr = VMBUS_PCPU_PTR(sc, event_flags_cnt, chan->ch_cpuid);
 
 	for (;;) {
 		int old_flag_cnt;
 
 		old_flag_cnt = *flag_cnt_ptr;
 		if (old_flag_cnt >= flag_cnt)
 			break;
 		if (atomic_cmpset_int(flag_cnt_ptr, old_flag_cnt, flag_cnt)) {
 			if (bootverbose) {
 				vmbus_chan_printf(chan,
 				    "chan%u update cpu%d flag_cnt to %d\n",
 				    chan->ch_id, chan->ch_cpuid, flag_cnt);
 			}
 			break;
 		}
 	}
 }
 
 static struct vmbus_channel *
 vmbus_chan_alloc(struct vmbus_softc *sc)
 {
 	struct vmbus_channel *chan;
 
 	chan = malloc(sizeof(*chan), M_DEVBUF, M_WAITOK | M_ZERO);
 
 	chan->ch_monprm = hyperv_dmamem_alloc(bus_get_dma_tag(sc->vmbus_dev),
 	    HYPERCALL_PARAM_ALIGN, 0, sizeof(struct hyperv_mon_param),
 	    &chan->ch_monprm_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO);
 	if (chan->ch_monprm == NULL) {
 		device_printf(sc->vmbus_dev, "monprm alloc failed\n");
 		free(chan, M_DEVBUF);
 		return NULL;
 	}
 
 	chan->ch_refs = 1;
 	chan->ch_vmbus = sc;
 	mtx_init(&chan->ch_subchan_lock, "vmbus subchan", NULL, MTX_DEF);
 	sx_init(&chan->ch_orphan_lock, "vmbus chorphan");
 	TAILQ_INIT(&chan->ch_subchans);
 	vmbus_rxbr_init(&chan->ch_rxbr);
 	vmbus_txbr_init(&chan->ch_txbr);
 
 	return chan;
 }
 
 static void
 vmbus_chan_free(struct vmbus_channel *chan)
 {
 
 	KASSERT(TAILQ_EMPTY(&chan->ch_subchans) && chan->ch_subchan_cnt == 0,
 	    ("still owns sub-channels"));
 	KASSERT((chan->ch_stflags &
 	    (VMBUS_CHAN_ST_OPENED |
 	     VMBUS_CHAN_ST_ONPRIL |
 	     VMBUS_CHAN_ST_ONSUBL |
 	     VMBUS_CHAN_ST_ONLIST)) == 0, ("free busy channel"));
 	KASSERT(chan->ch_orphan_xact == NULL,
 	    ("still has orphan xact installed"));
 	KASSERT(chan->ch_refs == 0, ("chan%u: invalid refcnt %d",
 	    chan->ch_id, chan->ch_refs));
 
 	hyperv_dmamem_free(&chan->ch_monprm_dma, chan->ch_monprm);
 	mtx_destroy(&chan->ch_subchan_lock);
 	sx_destroy(&chan->ch_orphan_lock);
 	vmbus_rxbr_deinit(&chan->ch_rxbr);
 	vmbus_txbr_deinit(&chan->ch_txbr);
 	free(chan, M_DEVBUF);
 }
 
 static int
 vmbus_chan_add(struct vmbus_channel *newchan)
 {
 	struct vmbus_softc *sc = newchan->ch_vmbus;
 	struct vmbus_channel *prichan;
 
 	if (newchan->ch_id == 0) {
 		/*
 		 * XXX
 		 * Chan0 will neither be processed nor should be offered;
 		 * skip it.
 		 */
 		device_printf(sc->vmbus_dev, "got chan0 offer, discard\n");
 		return EINVAL;
 	} else if (newchan->ch_id >= VMBUS_CHAN_MAX) {
 		device_printf(sc->vmbus_dev, "invalid chan%u offer\n",
 		    newchan->ch_id);
 		return EINVAL;
 	}
 
 	mtx_lock(&sc->vmbus_prichan_lock);
 	TAILQ_FOREACH(prichan, &sc->vmbus_prichans, ch_prilink) {
 		/*
 		 * Sub-channel will have the same type GUID and instance
 		 * GUID as its primary channel.
 		 */
 		if (memcmp(&prichan->ch_guid_type, &newchan->ch_guid_type,
 		    sizeof(struct hyperv_guid)) == 0 &&
 		    memcmp(&prichan->ch_guid_inst, &newchan->ch_guid_inst,
 		    sizeof(struct hyperv_guid)) == 0)
 			break;
 	}
 	if (VMBUS_CHAN_ISPRIMARY(newchan)) {
 		if (prichan == NULL) {
 			/* Install the new primary channel */
 			vmbus_chan_ins_prilist(sc, newchan);
 			mtx_unlock(&sc->vmbus_prichan_lock);
 			goto done;
 		} else {
 			mtx_unlock(&sc->vmbus_prichan_lock);
 			device_printf(sc->vmbus_dev,
 			    "duplicated primary chan%u\n", newchan->ch_id);
 			return EINVAL;
 		}
 	} else { /* Sub-channel */
 		if (prichan == NULL) {
 			mtx_unlock(&sc->vmbus_prichan_lock);
 			device_printf(sc->vmbus_dev,
 			    "no primary chan for chan%u\n", newchan->ch_id);
 			return EINVAL;
 		}
 		/*
 		 * Found the primary channel for this sub-channel and
 		 * move on.
 		 *
 		 * XXX refcnt prichan
 		 */
 	}
 	mtx_unlock(&sc->vmbus_prichan_lock);
 
 	/*
 	 * This is a sub-channel; link it with the primary channel.
 	 */
 	KASSERT(!VMBUS_CHAN_ISPRIMARY(newchan),
 	    ("new channel is not sub-channel"));
 	KASSERT(prichan != NULL, ("no primary channel"));
 
 	/*
 	 * Reference count this sub-channel; it will be dereferenced
 	 * when this sub-channel is closed.
 	 */
 	KASSERT(newchan->ch_refs == 1, ("chan%u: invalid refcnt %d",
 	    newchan->ch_id, newchan->ch_refs));
 	atomic_add_int(&newchan->ch_refs, 1);
 
 	newchan->ch_prichan = prichan;
 	newchan->ch_dev = prichan->ch_dev;
 
 	mtx_lock(&prichan->ch_subchan_lock);
 	vmbus_chan_ins_sublist(prichan, newchan);
 	mtx_unlock(&prichan->ch_subchan_lock);
 	/*
 	 * Notify anyone that is interested in this sub-channel,
 	 * after this sub-channel is setup.
 	 */
 	wakeup(prichan);
 done:
 	/*
 	 * Hook this channel up for later revocation.
 	 */
 	mtx_lock(&sc->vmbus_chan_lock);
 	vmbus_chan_ins_list(sc, newchan);
 	mtx_unlock(&sc->vmbus_chan_lock);
 
 	if (bootverbose) {
 		vmbus_chan_printf(newchan, "chan%u subidx%u offer\n",
 		    newchan->ch_id, newchan->ch_subidx);
 	}
 
 	/* Select default cpu for this channel. */
 	vmbus_chan_cpu_default(newchan);
 
 	return 0;
 }
 
 void
 vmbus_chan_cpu_set(struct vmbus_channel *chan, int cpu)
 {
 	KASSERT(cpu >= 0 && cpu < mp_ncpus, ("invalid cpu %d", cpu));
 
 	if (chan->ch_vmbus->vmbus_version == VMBUS_VERSION_WS2008 ||
 	    chan->ch_vmbus->vmbus_version == VMBUS_VERSION_WIN7) {
 		/* Only cpu0 is supported */
 		cpu = 0;
 	}
 
 	chan->ch_cpuid = cpu;
 	chan->ch_vcpuid = VMBUS_PCPU_GET(chan->ch_vmbus, vcpuid, cpu);
 
 	if (bootverbose) {
 		vmbus_chan_printf(chan,
 		    "chan%u assigned to cpu%u [vcpu%u]\n",
 		    chan->ch_id, chan->ch_cpuid, chan->ch_vcpuid);
 	}
 }
 
 void
 vmbus_chan_cpu_rr(struct vmbus_channel *chan)
 {
 	static uint32_t vmbus_chan_nextcpu;
 	int cpu;
 
 	cpu = atomic_fetchadd_int(&vmbus_chan_nextcpu, 1) % mp_ncpus;
 	vmbus_chan_cpu_set(chan, cpu);
 }
 
 static void
 vmbus_chan_cpu_default(struct vmbus_channel *chan)
 {
 	/*
 	 * By default, pin the channel to cpu0.  Devices having
 	 * special channel-cpu mapping requirement should call
 	 * vmbus_chan_cpu_{set,rr}().
 	 */
 	vmbus_chan_cpu_set(chan, 0);
 }
 
 static void
 vmbus_chan_msgproc_choffer(struct vmbus_softc *sc,
     const struct vmbus_message *msg)
 {
 	const struct vmbus_chanmsg_choffer *offer;
 	struct vmbus_channel *chan;
 	task_fn_t *detach_fn, *attach_fn;
 	int error;
 
 	offer = (const struct vmbus_chanmsg_choffer *)msg->msg_data;
 
 	chan = vmbus_chan_alloc(sc);
 	if (chan == NULL) {
 		device_printf(sc->vmbus_dev, "allocate chan%u failed\n",
 		    offer->chm_chanid);
 		return;
 	}
 
 	chan->ch_id = offer->chm_chanid;
 	chan->ch_subidx = offer->chm_subidx;
 	chan->ch_guid_type = offer->chm_chtype;
 	chan->ch_guid_inst = offer->chm_chinst;
 
 	/* Batch reading is on by default */
 	chan->ch_flags |= VMBUS_CHAN_FLAG_BATCHREAD;
 
 	chan->ch_monprm->mp_connid = VMBUS_CONNID_EVENT;
 	if (sc->vmbus_version != VMBUS_VERSION_WS2008)
 		chan->ch_monprm->mp_connid = offer->chm_connid;
 
 	if (offer->chm_flags1 & VMBUS_CHOFFER_FLAG1_HASMNF) {
 		int trig_idx;
 
 		/*
 		 * Setup MNF stuffs.
 		 */
 		chan->ch_txflags |= VMBUS_CHAN_TXF_HASMNF;
 
 		trig_idx = offer->chm_montrig / VMBUS_MONTRIG_LEN;
 		if (trig_idx >= VMBUS_MONTRIGS_MAX)
 			panic("invalid monitor trigger %u", offer->chm_montrig);
 		chan->ch_montrig =
 		    &sc->vmbus_mnf2->mnf_trigs[trig_idx].mt_pending;
 
 		chan->ch_montrig_mask =
 		    1 << (offer->chm_montrig % VMBUS_MONTRIG_LEN);
 	}
 
 	/*
 	 * Setup event flag.
 	 */
 	chan->ch_evtflag =
 	    &sc->vmbus_tx_evtflags[chan->ch_id >> VMBUS_EVTFLAG_SHIFT];
 	chan->ch_evtflag_mask = 1UL << (chan->ch_id & VMBUS_EVTFLAG_MASK);
 
 	/*
 	 * Setup attach and detach tasks.
 	 */
 	if (VMBUS_CHAN_ISPRIMARY(chan)) {
 		chan->ch_mgmt_tq = sc->vmbus_devtq;
 		attach_fn = vmbus_prichan_attach_task;
 		detach_fn = vmbus_prichan_detach_task;
 	} else {
 		chan->ch_mgmt_tq = sc->vmbus_subchtq;
 		attach_fn = vmbus_subchan_attach_task;
 		detach_fn = vmbus_subchan_detach_task;
 	}
 	TASK_INIT(&chan->ch_attach_task, 0, attach_fn, chan);
 	TASK_INIT(&chan->ch_detach_task, 0, detach_fn, chan);
 
 	error = vmbus_chan_add(chan);
 	if (error) {
 		device_printf(sc->vmbus_dev, "add chan%u failed: %d\n",
 		    chan->ch_id, error);
 		atomic_subtract_int(&chan->ch_refs, 1);
 		vmbus_chan_free(chan);
 		return;
 	}
 	taskqueue_enqueue(chan->ch_mgmt_tq, &chan->ch_attach_task);
 }
 
 static void
 vmbus_chan_msgproc_chrescind(struct vmbus_softc *sc,
     const struct vmbus_message *msg)
 {
 	const struct vmbus_chanmsg_chrescind *note;
 	struct vmbus_channel *chan;
 
 	note = (const struct vmbus_chanmsg_chrescind *)msg->msg_data;
 	if (note->chm_chanid > VMBUS_CHAN_MAX) {
 		device_printf(sc->vmbus_dev, "invalid revoked chan%u\n",
 		    note->chm_chanid);
 		return;
 	}
 
 	/*
 	 * Find and remove the target channel from the channel list.
 	 */
 	mtx_lock(&sc->vmbus_chan_lock);
 	TAILQ_FOREACH(chan, &sc->vmbus_chans, ch_link) {
 		if (chan->ch_id == note->chm_chanid)
 			break;
 	}
 	if (chan == NULL) {
 		mtx_unlock(&sc->vmbus_chan_lock);
 		device_printf(sc->vmbus_dev, "chan%u is not offered\n",
 		    note->chm_chanid);
 		return;
 	}
 	vmbus_chan_rem_list(sc, chan);
 	mtx_unlock(&sc->vmbus_chan_lock);
 
 	if (VMBUS_CHAN_ISPRIMARY(chan)) {
 		/*
 		 * The target channel is a primary channel; remove the
 		 * target channel from the primary channel list now,
 		 * instead of later, so that it will not be found by
 		 * other sub-channel offers, which are processed in
 		 * this thread.
 		 */
 		mtx_lock(&sc->vmbus_prichan_lock);
 		vmbus_chan_rem_prilist(sc, chan);
 		mtx_unlock(&sc->vmbus_prichan_lock);
 	}
 
 	/*
 	 * NOTE:
 	 * The following processing order is critical:
 	 * Set the REVOKED state flag before orphaning the installed xact.
 	 */
 
 	if (atomic_testandset_int(&chan->ch_stflags,
 	    VMBUS_CHAN_ST_REVOKED_SHIFT))
 		panic("channel has already been revoked");
 
 	sx_xlock(&chan->ch_orphan_lock);
 	if (chan->ch_orphan_xact != NULL)
 		vmbus_xact_ctx_orphan(chan->ch_orphan_xact);
 	sx_xunlock(&chan->ch_orphan_lock);
 
 	if (bootverbose)
 		vmbus_chan_printf(chan, "chan%u revoked\n", note->chm_chanid);
 	vmbus_chan_detach(chan);
 }
 
 static int
 vmbus_chan_release(struct vmbus_channel *chan)
 {
 	struct vmbus_softc *sc = chan->ch_vmbus;
 	struct vmbus_chanmsg_chfree *req;
 	struct vmbus_msghc *mh;
 	int error;
 
 	mh = vmbus_msghc_get(sc, sizeof(*req));
 	if (mh == NULL) {
 		vmbus_chan_printf(chan,
 		    "can not get msg hypercall for chfree(chan%u)\n",
 		    chan->ch_id);
 		return (ENXIO);
 	}
 
 	req = vmbus_msghc_dataptr(mh);
 	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHFREE;
 	req->chm_chanid = chan->ch_id;
 
 	error = vmbus_msghc_exec_noresult(mh);
 	vmbus_msghc_put(sc, mh);
 
 	if (error) {
 		vmbus_chan_printf(chan,
 		    "chfree(chan%u) msg hypercall exec failed: %d\n",
 		    chan->ch_id, error);
 	} else {
 		if (bootverbose)
 			vmbus_chan_printf(chan, "chan%u freed\n", chan->ch_id);
 	}
 	return (error);
 }
 
 static void
 vmbus_prichan_detach_task(void *xchan, int pending __unused)
 {
 	struct vmbus_channel *chan = xchan;
 
 	KASSERT(VMBUS_CHAN_ISPRIMARY(chan),
 	    ("chan%u is not primary channel", chan->ch_id));
 
 	/* Delete and detach the device associated with this channel. */
 	vmbus_delete_child(chan);
 
 	/* Release this channel (back to vmbus). */
 	vmbus_chan_release(chan);
 
 	/* Free this channel's resource. */
 	vmbus_chan_free(chan);
 }
 
 static void
 vmbus_subchan_detach_task(void *xchan, int pending __unused)
 {
 	struct vmbus_channel *chan = xchan;
 	struct vmbus_channel *pri_chan = chan->ch_prichan;
 
 	KASSERT(!VMBUS_CHAN_ISPRIMARY(chan),
 	    ("chan%u is primary channel", chan->ch_id));
 
 	/* Release this channel (back to vmbus). */
 	vmbus_chan_release(chan);
 
 	/* Unlink from its primary channel's sub-channel list. */
 	mtx_lock(&pri_chan->ch_subchan_lock);
 	vmbus_chan_rem_sublist(pri_chan, chan);
 	mtx_unlock(&pri_chan->ch_subchan_lock);
 	/* Notify anyone that is waiting for this sub-channel to vanish. */
 	wakeup(pri_chan);
 
 	/* Free this channel's resource. */
 	vmbus_chan_free(chan);
 }
 
 static void
 vmbus_prichan_attach_task(void *xchan, int pending __unused)
 {
 
 	/*
 	 * Add device for this primary channel.
 	 */
 	vmbus_add_child(xchan);
 }
 
 static void
 vmbus_subchan_attach_task(void *xchan __unused, int pending __unused)
 {
 
 	/* Nothing */
 }
 
 void
 vmbus_chan_destroy_all(struct vmbus_softc *sc)
 {
 
 	/*
 	 * Detach all devices and destroy the corresponding primary
 	 * channels.
 	 */
 	for (;;) {
 		struct vmbus_channel *chan;
 
 		mtx_lock(&sc->vmbus_chan_lock);
 		TAILQ_FOREACH(chan, &sc->vmbus_chans, ch_link) {
 			if (VMBUS_CHAN_ISPRIMARY(chan))
 				break;
 		}
 		if (chan == NULL) {
 			/* No more primary channels; done. */
 			mtx_unlock(&sc->vmbus_chan_lock);
 			break;
 		}
 		vmbus_chan_rem_list(sc, chan);
 		mtx_unlock(&sc->vmbus_chan_lock);
 
 		mtx_lock(&sc->vmbus_prichan_lock);
 		vmbus_chan_rem_prilist(sc, chan);
 		mtx_unlock(&sc->vmbus_prichan_lock);
 
 		taskqueue_enqueue(chan->ch_mgmt_tq, &chan->ch_detach_task);
 	}
 }
 
 struct vmbus_channel **
 vmbus_subchan_get(struct vmbus_channel *pri_chan, int subchan_cnt)
 {
 	struct vmbus_channel **ret, *chan;
 	int i;
 
 	KASSERT(subchan_cnt > 0, ("invalid sub-channel count %d", subchan_cnt));
 
 	ret = malloc(subchan_cnt * sizeof(struct vmbus_channel *), M_TEMP,
 	    M_WAITOK);
 
 	mtx_lock(&pri_chan->ch_subchan_lock);
 
 	while (pri_chan->ch_subchan_cnt < subchan_cnt)
 		mtx_sleep(pri_chan, &pri_chan->ch_subchan_lock, 0, "subch", 0);
 
 	i = 0;
 	TAILQ_FOREACH(chan, &pri_chan->ch_subchans, ch_sublink) {
 		/* TODO: refcnt chan */
 		ret[i] = chan;
 
 		++i;
 		if (i == subchan_cnt)
 			break;
 	}
 	KASSERT(i == subchan_cnt, ("invalid subchan count %d, should be %d",
 	    pri_chan->ch_subchan_cnt, subchan_cnt));
 
 	mtx_unlock(&pri_chan->ch_subchan_lock);
 
 	return ret;
 }
 
 void
 vmbus_subchan_rel(struct vmbus_channel **subchan, int subchan_cnt __unused)
 {
 
 	free(subchan, M_TEMP);
 }
 
 void
 vmbus_subchan_drain(struct vmbus_channel *pri_chan)
 {
 	mtx_lock(&pri_chan->ch_subchan_lock);
 	while (pri_chan->ch_subchan_cnt > 0)
 		mtx_sleep(pri_chan, &pri_chan->ch_subchan_lock, 0, "dsubch", 0);
 	mtx_unlock(&pri_chan->ch_subchan_lock);
 }
 
 void
 vmbus_chan_msgproc(struct vmbus_softc *sc, const struct vmbus_message *msg)
 {
 	vmbus_chanmsg_proc_t msg_proc;
 	uint32_t msg_type;
 
 	msg_type = ((const struct vmbus_chanmsg_hdr *)msg->msg_data)->chm_type;
 	KASSERT(msg_type < VMBUS_CHANMSG_TYPE_MAX,
 	    ("invalid message type %u", msg_type));
 
 	msg_proc = vmbus_chan_msgprocs[msg_type];
 	if (msg_proc != NULL)
 		msg_proc(sc, msg);
 }
 
 void
 vmbus_chan_set_readbatch(struct vmbus_channel *chan, bool on)
 {
 	if (!on)
 		chan->ch_flags &= ~VMBUS_CHAN_FLAG_BATCHREAD;
 	else
 		chan->ch_flags |= VMBUS_CHAN_FLAG_BATCHREAD;
 }
 
 uint32_t
 vmbus_chan_id(const struct vmbus_channel *chan)
 {
 	return chan->ch_id;
 }
 
 uint32_t
 vmbus_chan_subidx(const struct vmbus_channel *chan)
 {
 	return chan->ch_subidx;
 }
 
 bool
 vmbus_chan_is_primary(const struct vmbus_channel *chan)
 {
 	if (VMBUS_CHAN_ISPRIMARY(chan))
 		return true;
 	else
 		return false;
 }
 
 const struct hyperv_guid *
 vmbus_chan_guid_inst(const struct vmbus_channel *chan)
 {
 	return &chan->ch_guid_inst;
 }
 
 int
 vmbus_chan_prplist_nelem(int br_size, int prpcnt_max, int dlen_max)
 {
 	int elem_size;
 
 	elem_size = __offsetof(struct vmbus_chanpkt_prplist,
 	    cp_range[0].gpa_page[prpcnt_max]);
 	elem_size += dlen_max;
 	elem_size = VMBUS_CHANPKT_TOTLEN(elem_size);
 
 	return (vmbus_br_nelem(br_size, elem_size));
 }
 
 bool
 vmbus_chan_tx_empty(const struct vmbus_channel *chan)
 {
 
 	return (vmbus_txbr_empty(&chan->ch_txbr));
 }
 
 bool
 vmbus_chan_rx_empty(const struct vmbus_channel *chan)
 {
 
 	return (vmbus_rxbr_empty(&chan->ch_rxbr));
 }
 
 static int
 vmbus_chan_printf(const struct vmbus_channel *chan, const char *fmt, ...)
 {
 	va_list ap;
 	device_t dev;
 	int retval;
 
 	if (chan->ch_dev == NULL || !device_is_alive(chan->ch_dev))
 		dev = chan->ch_vmbus->vmbus_dev;
 	else
 		dev = chan->ch_dev;
 
 	retval = device_print_prettyname(dev);
 	va_start(ap, fmt);
 	retval += vprintf(fmt, ap);
 	va_end(ap);
 
 	return (retval);
 }
 
 void
 vmbus_chan_run_task(struct vmbus_channel *chan, struct task *task)
 {
 
 	taskqueue_enqueue(chan->ch_tq, task);
 	taskqueue_drain(chan->ch_tq, task);
 }
 
 struct taskqueue *
 vmbus_chan_mgmt_tq(const struct vmbus_channel *chan)
 {
 
 	return (chan->ch_mgmt_tq);
 }
 
 bool
 vmbus_chan_is_revoked(const struct vmbus_channel *chan)
 {
 
 	if (chan->ch_stflags & VMBUS_CHAN_ST_REVOKED)
 		return (true);
 	return (false);
 }
 
 void
 vmbus_chan_set_orphan(struct vmbus_channel *chan, struct vmbus_xact_ctx *xact)
 {
 
 	sx_xlock(&chan->ch_orphan_lock);
 	chan->ch_orphan_xact = xact;
 	sx_xunlock(&chan->ch_orphan_lock);
 }
 
 void
 vmbus_chan_unset_orphan(struct vmbus_channel *chan)
 {
 
 	sx_xlock(&chan->ch_orphan_lock);
 	chan->ch_orphan_xact = NULL;
 	sx_xunlock(&chan->ch_orphan_lock);
+}
+
+const void *
+vmbus_chan_xact_wait(const struct vmbus_channel *chan,
+    struct vmbus_xact *xact, size_t *resp_len, bool can_sleep)
+{
+	const void *ret;
+
+	if (can_sleep)
+		ret = vmbus_xact_wait(xact, resp_len);
+	else
+		ret = vmbus_xact_busywait(xact, resp_len);
+	if (vmbus_chan_is_revoked(chan)) {
+		/*
+		 * This xact probably is interrupted, and the
+		 * interruption can race the reply reception,
+		 * so we have to make sure that there are nothing
+		 * left on the RX bufring, i.e. this xact will
+		 * not be touched, once this function returns.
+		 *
+		 * Since the hypervisor will not put more data
+		 * onto the RX bufring once the channel is revoked,
+		 * the following loop will be terminated, once all
+		 * data are drained by the driver's channel
+		 * callback.
+		 */
+		while (!vmbus_chan_rx_empty(chan)) {
+			if (can_sleep)
+				pause("chxact", 1);
+			else
+				DELAY(1000);
+		}
+	}
+	return (ret);
 }
Index: projects/clang391-import/sys/dev/hyperv/vmbus/vmbus_var.h
===================================================================
--- projects/clang391-import/sys/dev/hyperv/vmbus/vmbus_var.h	(revision 309262)
+++ projects/clang391-import/sys/dev/hyperv/vmbus/vmbus_var.h	(revision 309263)
@@ -1,167 +1,172 @@
 /*-
  * Copyright (c) 2016 Microsoft Corp.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _VMBUS_VAR_H_
 #define _VMBUS_VAR_H_
 
 #include <sys/param.h>
 #include <sys/taskqueue.h>
 #include <sys/rman.h>
 
 #include <dev/hyperv/include/hyperv_busdma.h>
 #include <dev/pci/pcivar.h>
 #include <dev/pci/pcib_private.h>
 
 /*
  * NOTE: DO NOT CHANGE THIS.
  */
 #define VMBUS_SINT_MESSAGE	2
 /*
  * NOTE:
  * - DO NOT set it to the same value as VMBUS_SINT_MESSAGE.
  * - DO NOT set it to 0.
  */
 #define VMBUS_SINT_TIMER	4
 
 /*
  * NOTE: DO NOT CHANGE THESE
  */
 #define VMBUS_CONNID_MESSAGE		1
 #define VMBUS_CONNID_EVENT		2
 
 struct vmbus_message;
 struct vmbus_softc;
 
 typedef void		(*vmbus_chanmsg_proc_t)(struct vmbus_softc *,
 			    const struct vmbus_message *);
 
 #define VMBUS_CHANMSG_PROC(name, func)	\
 	[VMBUS_CHANMSG_TYPE_##name] = func
 #define VMBUS_CHANMSG_PROC_WAKEUP(name)	\
 	VMBUS_CHANMSG_PROC(name, vmbus_msghc_wakeup)
 
 struct vmbus_pcpu_data {
 	u_long			*intr_cnt;	/* Hyper-V interrupt counter */
 	struct vmbus_message	*message;	/* shared messages */
 	uint32_t		vcpuid;		/* virtual cpuid */
 	int			event_flags_cnt;/* # of event flags */
 	struct vmbus_evtflags	*event_flags;	/* event flags from host */
 
 	/* Rarely used fields */
 	struct hyperv_dma	message_dma;	/* busdma glue */
 	struct hyperv_dma	event_flags_dma;/* busdma glue */
 	struct taskqueue	*event_tq;	/* event taskq */
 	struct taskqueue	*message_tq;	/* message taskq */
 	struct task		message_task;	/* message task */
 } __aligned(CACHE_LINE_SIZE);
 
 #if __FreeBSD_version < 1100000
 typedef u_long rman_res_t;
 #endif
 
 struct vmbus_softc {
 	void			(*vmbus_event_proc)(struct vmbus_softc *, int);
 	u_long			*vmbus_tx_evtflags;
 						/* event flags to host */
 	struct vmbus_mnf	*vmbus_mnf2;	/* monitored by host */
 
 	u_long			*vmbus_rx_evtflags;
 						/* compat evtflgs from host */
 	struct vmbus_channel	**vmbus_chmap;
 	struct vmbus_xact_ctx	*vmbus_xc;
 	struct vmbus_pcpu_data	vmbus_pcpu[MAXCPU];
 
 	/*
 	 * Rarely used fields
 	 */
 
 	device_t		vmbus_dev;
 	int			vmbus_idtvec;
 	uint32_t		vmbus_flags;	/* see VMBUS_FLAG_ */
 	uint32_t		vmbus_version;
 	uint32_t		vmbus_gpadl;
 
 	/* Shared memory for vmbus_{rx,tx}_evtflags */
 	void			*vmbus_evtflags;
 	struct hyperv_dma	vmbus_evtflags_dma;
 
 	void			*vmbus_mnf1;	/* monitored by VM, unused */
 	struct hyperv_dma	vmbus_mnf1_dma;
 	struct hyperv_dma	vmbus_mnf2_dma;
 
 	bool			vmbus_scandone;
 	struct task		vmbus_scandone_task;
 
 	struct taskqueue	*vmbus_devtq;	/* for dev attach/detach */
 	struct taskqueue	*vmbus_subchtq;	/* for sub-chan attach/detach */
 
 	/* Primary channels */
 	struct mtx		vmbus_prichan_lock;
 	TAILQ_HEAD(, vmbus_channel) vmbus_prichans;
 
 	/* Complete channel list */
 	struct mtx		vmbus_chan_lock;
 	TAILQ_HEAD(, vmbus_channel) vmbus_chans;
 
 #ifdef NEW_PCIB
 	/* The list of usable MMIO ranges for PCIe pass-through */
 	struct pcib_host_resources vmbus_mmio_res;
 #endif
 };
 
 #define VMBUS_FLAG_ATTACHED	0x0001	/* vmbus was attached */
 #define VMBUS_FLAG_SYNIC	0x0002	/* SynIC was setup */
 
 #define VMBUS_PCPU_GET(sc, field, cpu)	(sc)->vmbus_pcpu[(cpu)].field
 #define VMBUS_PCPU_PTR(sc, field, cpu)	&(sc)->vmbus_pcpu[(cpu)].field
 
 struct vmbus_channel;
 struct trapframe;
 struct vmbus_message;
 struct vmbus_msghc;
 
 void		vmbus_handle_intr(struct trapframe *);
 int		vmbus_add_child(struct vmbus_channel *);
 int		vmbus_delete_child(struct vmbus_channel *);
 void		vmbus_et_intr(struct trapframe *);
 uint32_t	vmbus_gpadl_alloc(struct vmbus_softc *);
 
 struct vmbus_msghc *
 		vmbus_msghc_get(struct vmbus_softc *, size_t);
 void		vmbus_msghc_put(struct vmbus_softc *, struct vmbus_msghc *);
 void		*vmbus_msghc_dataptr(struct vmbus_msghc *);
 int		vmbus_msghc_exec_noresult(struct vmbus_msghc *);
 int		vmbus_msghc_exec(struct vmbus_softc *, struct vmbus_msghc *);
+void		vmbus_msghc_exec_cancel(struct vmbus_softc *,
+		    struct vmbus_msghc *);
 const struct vmbus_message *
 		vmbus_msghc_wait_result(struct vmbus_softc *,
+		    struct vmbus_msghc *);
+const struct vmbus_message *
+		vmbus_msghc_poll_result(struct vmbus_softc *,
 		    struct vmbus_msghc *);
 void		vmbus_msghc_wakeup(struct vmbus_softc *,
 		    const struct vmbus_message *);
 void		vmbus_msghc_reset(struct vmbus_msghc *, size_t);
 
 #endif	/* !_VMBUS_VAR_H_ */
Index: projects/clang391-import/sys/dev/hyperv/vmbus/vmbus_xact.c
===================================================================
--- projects/clang391-import/sys/dev/hyperv/vmbus/vmbus_xact.c	(revision 309262)
+++ projects/clang391-import/sys/dev/hyperv/vmbus/vmbus_xact.c	(revision 309263)
@@ -1,406 +1,442 @@
 /*-
  * Copyright (c) 2016 Microsoft Corp.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/systm.h>
 
 #include <dev/hyperv/include/hyperv_busdma.h>
 #include <dev/hyperv/include/vmbus_xact.h>
 
 struct vmbus_xact {
 	struct vmbus_xact_ctx		*x_ctx;
 	void				*x_priv;
 
 	void				*x_req;
 	struct hyperv_dma		x_req_dma;
 
 	const void			*x_resp;
 	size_t				x_resp_len;
 	void				*x_resp0;
 };
 
 struct vmbus_xact_ctx {
 	size_t				xc_req_size;
 	size_t				xc_resp_size;
 	size_t				xc_priv_size;
 
 	struct mtx			xc_lock;
 	/*
 	 * Protected by xc_lock.
 	 */
 	uint32_t			xc_flags;	/* VMBUS_XACT_CTXF_ */
 	struct vmbus_xact		*xc_free;
 	struct vmbus_xact		*xc_active;
 	struct vmbus_xact		*xc_orphan;
 };
 
 #define VMBUS_XACT_CTXF_DESTROY		0x0001
 
 static struct vmbus_xact	*vmbus_xact_alloc(struct vmbus_xact_ctx *,
 				    bus_dma_tag_t);
 static void			vmbus_xact_free(struct vmbus_xact *);
 static struct vmbus_xact	*vmbus_xact_get1(struct vmbus_xact_ctx *,
 				    uint32_t);
-const void			*vmbus_xact_wait1(struct vmbus_xact *, size_t *,
+static const void		*vmbus_xact_wait1(struct vmbus_xact *, size_t *,
 				    bool);
+static const void		*vmbus_xact_return(struct vmbus_xact *,
+				    size_t *);
 static void			vmbus_xact_save_resp(struct vmbus_xact *,
 				    const void *, size_t);
 static void			vmbus_xact_ctx_free(struct vmbus_xact_ctx *);
 
 static struct vmbus_xact *
 vmbus_xact_alloc(struct vmbus_xact_ctx *ctx, bus_dma_tag_t parent_dtag)
 {
 	struct vmbus_xact *xact;
 
 	xact = malloc(sizeof(*xact), M_DEVBUF, M_WAITOK | M_ZERO);
 	xact->x_ctx = ctx;
 
 	/* XXX assume that page aligned is enough */
 	xact->x_req = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
 	    ctx->xc_req_size, &xact->x_req_dma, BUS_DMA_WAITOK);
 	if (xact->x_req == NULL) {
 		free(xact, M_DEVBUF);
 		return (NULL);
 	}
 	if (ctx->xc_priv_size != 0)
 		xact->x_priv = malloc(ctx->xc_priv_size, M_DEVBUF, M_WAITOK);
 	xact->x_resp0 = malloc(ctx->xc_resp_size, M_DEVBUF, M_WAITOK);
 
 	return (xact);
 }
 
 static void
 vmbus_xact_free(struct vmbus_xact *xact)
 {
 
 	hyperv_dmamem_free(&xact->x_req_dma, xact->x_req);
 	free(xact->x_resp0, M_DEVBUF);
 	if (xact->x_priv != NULL)
 		free(xact->x_priv, M_DEVBUF);
 	free(xact, M_DEVBUF);
 }
 
 static struct vmbus_xact *
 vmbus_xact_get1(struct vmbus_xact_ctx *ctx, uint32_t dtor_flag)
 {
 	struct vmbus_xact *xact;
 
 	mtx_lock(&ctx->xc_lock);
 
 	while ((ctx->xc_flags & dtor_flag) == 0 && ctx->xc_free == NULL)
 		mtx_sleep(&ctx->xc_free, &ctx->xc_lock, 0, "gxact", 0);
 	if (ctx->xc_flags & dtor_flag) {
 		/* Being destroyed */
 		xact = NULL;
 	} else {
 		xact = ctx->xc_free;
 		KASSERT(xact != NULL, ("no free xact"));
 		KASSERT(xact->x_resp == NULL, ("xact has pending response"));
 		ctx->xc_free = NULL;
 	}
 
 	mtx_unlock(&ctx->xc_lock);
 
 	return (xact);
 }
 
 struct vmbus_xact_ctx *
 vmbus_xact_ctx_create(bus_dma_tag_t dtag, size_t req_size, size_t resp_size,
     size_t priv_size)
 {
 	struct vmbus_xact_ctx *ctx;
 
 	KASSERT(req_size > 0, ("request size is 0"));
 	KASSERT(resp_size > 0, ("response size is 0"));
 
 	ctx = malloc(sizeof(*ctx), M_DEVBUF, M_WAITOK | M_ZERO);
 	ctx->xc_req_size = req_size;
 	ctx->xc_resp_size = resp_size;
 	ctx->xc_priv_size = priv_size;
 
 	ctx->xc_free = vmbus_xact_alloc(ctx, dtag);
 	if (ctx->xc_free == NULL) {
 		free(ctx, M_DEVBUF);
 		return (NULL);
 	}
 
 	mtx_init(&ctx->xc_lock, "vmbus xact", NULL, MTX_DEF);
 
 	return (ctx);
 }
 
 bool
 vmbus_xact_ctx_orphan(struct vmbus_xact_ctx *ctx)
 {
 	mtx_lock(&ctx->xc_lock);
 	if (ctx->xc_flags & VMBUS_XACT_CTXF_DESTROY) {
 		mtx_unlock(&ctx->xc_lock);
 		return (false);
 	}
 	ctx->xc_flags |= VMBUS_XACT_CTXF_DESTROY;
 	mtx_unlock(&ctx->xc_lock);
 
 	wakeup(&ctx->xc_free);
 	wakeup(&ctx->xc_active);
 
 	ctx->xc_orphan = vmbus_xact_get1(ctx, 0);
 	if (ctx->xc_orphan == NULL)
 		panic("can't get xact");
 	return (true);
 }
 
 static void
 vmbus_xact_ctx_free(struct vmbus_xact_ctx *ctx)
 {
 	KASSERT(ctx->xc_flags & VMBUS_XACT_CTXF_DESTROY,
 	    ("xact ctx was not orphaned"));
 	KASSERT(ctx->xc_orphan != NULL, ("no orphaned xact"));
 
 	vmbus_xact_free(ctx->xc_orphan);
 	mtx_destroy(&ctx->xc_lock);
 	free(ctx, M_DEVBUF);
 }
 
 void
 vmbus_xact_ctx_destroy(struct vmbus_xact_ctx *ctx)
 {
 
 	vmbus_xact_ctx_orphan(ctx);
 	vmbus_xact_ctx_free(ctx);
 }
 
 struct vmbus_xact *
 vmbus_xact_get(struct vmbus_xact_ctx *ctx, size_t req_len)
 {
 	struct vmbus_xact *xact;
 
 	if (req_len > ctx->xc_req_size)
 		panic("invalid request size %zu", req_len);
 
 	xact = vmbus_xact_get1(ctx, VMBUS_XACT_CTXF_DESTROY);
 	if (xact == NULL)
 		return (NULL);
 
 	memset(xact->x_req, 0, req_len);
 	return (xact);
 }
 
 void
 vmbus_xact_put(struct vmbus_xact *xact)
 {
 	struct vmbus_xact_ctx *ctx = xact->x_ctx;
 
 	KASSERT(ctx->xc_active == NULL, ("pending active xact"));
 	xact->x_resp = NULL;
 
 	mtx_lock(&ctx->xc_lock);
 	KASSERT(ctx->xc_free == NULL, ("has free xact"));
 	ctx->xc_free = xact;
 	mtx_unlock(&ctx->xc_lock);
 	wakeup(&ctx->xc_free);
 }
 
 void *
 vmbus_xact_req_data(const struct vmbus_xact *xact)
 {
 
 	return (xact->x_req);
 }
 
 bus_addr_t
 vmbus_xact_req_paddr(const struct vmbus_xact *xact)
 {
 
 	return (xact->x_req_dma.hv_paddr);
 }
 
 void *
 vmbus_xact_priv(const struct vmbus_xact *xact, size_t priv_len)
 {
 
 	if (priv_len > xact->x_ctx->xc_priv_size)
 		panic("invalid priv size %zu", priv_len);
 	return (xact->x_priv);
 }
 
 void
 vmbus_xact_activate(struct vmbus_xact *xact)
 {
 	struct vmbus_xact_ctx *ctx = xact->x_ctx;
 
 	KASSERT(xact->x_resp == NULL, ("xact has pending response"));
 
 	mtx_lock(&ctx->xc_lock);
 	KASSERT(ctx->xc_active == NULL, ("pending active xact"));
 	ctx->xc_active = xact;
 	mtx_unlock(&ctx->xc_lock);
 }
 
 void
 vmbus_xact_deactivate(struct vmbus_xact *xact)
 {
 	struct vmbus_xact_ctx *ctx = xact->x_ctx;
 
 	mtx_lock(&ctx->xc_lock);
 	KASSERT(ctx->xc_active == xact, ("xact mismatch"));
 	ctx->xc_active = NULL;
 	mtx_unlock(&ctx->xc_lock);
 }
 
-const void *
-vmbus_xact_wait1(struct vmbus_xact *xact, size_t *resp_len,
-    bool can_sleep)
+static const void *
+vmbus_xact_return(struct vmbus_xact *xact, size_t *resp_len)
 {
 	struct vmbus_xact_ctx *ctx = xact->x_ctx;
 	const void *resp;
 
-	mtx_lock(&ctx->xc_lock);
-
-	KASSERT(ctx->xc_active == xact, ("xact mismatch"));
-	while (xact->x_resp == NULL &&
-	    (ctx->xc_flags & VMBUS_XACT_CTXF_DESTROY) == 0) {
-		if (can_sleep) {
-			mtx_sleep(&ctx->xc_active, &ctx->xc_lock, 0,
-			    "wxact", 0);
-		} else {
-			mtx_unlock(&ctx->xc_lock);
-			DELAY(1000);
-			mtx_lock(&ctx->xc_lock);
-		}
-	}
+	mtx_assert(&ctx->xc_lock, MA_OWNED);
 	KASSERT(ctx->xc_active == xact, ("xact trashed"));
 
 	if ((ctx->xc_flags & VMBUS_XACT_CTXF_DESTROY) && xact->x_resp == NULL) {
 		uint8_t b = 0;
 
 		/*
 		 * Orphaned and no response was received yet; fake up
 		 * an one byte response.
 		 */
 		printf("vmbus: xact ctx was orphaned w/ pending xact\n");
 		vmbus_xact_save_resp(ctx->xc_active, &b, sizeof(b));
 	}
 	KASSERT(xact->x_resp != NULL, ("no response"));
 
 	ctx->xc_active = NULL;
 
 	resp = xact->x_resp;
 	*resp_len = xact->x_resp_len;
 
+	return (resp);
+}
+
+static const void *
+vmbus_xact_wait1(struct vmbus_xact *xact, size_t *resp_len,
+    bool can_sleep)
+{
+	struct vmbus_xact_ctx *ctx = xact->x_ctx;
+	const void *resp;
+
+	mtx_lock(&ctx->xc_lock);
+
+	KASSERT(ctx->xc_active == xact, ("xact mismatch"));
+	while (xact->x_resp == NULL &&
+	    (ctx->xc_flags & VMBUS_XACT_CTXF_DESTROY) == 0) {
+		if (can_sleep) {
+			mtx_sleep(&ctx->xc_active, &ctx->xc_lock, 0,
+			    "wxact", 0);
+		} else {
+			mtx_unlock(&ctx->xc_lock);
+			DELAY(1000);
+			mtx_lock(&ctx->xc_lock);
+		}
+	}
+	resp = vmbus_xact_return(xact, resp_len);
+
 	mtx_unlock(&ctx->xc_lock);
 
 	return (resp);
 }
 
 const void *
 vmbus_xact_wait(struct vmbus_xact *xact, size_t *resp_len)
 {
 
 	return (vmbus_xact_wait1(xact, resp_len, true /* can sleep */));
 }
 
 const void *
 vmbus_xact_busywait(struct vmbus_xact *xact, size_t *resp_len)
 {
 
 	return (vmbus_xact_wait1(xact, resp_len, false /* can't sleep */));
+}
+
+const void *
+vmbus_xact_poll(struct vmbus_xact *xact, size_t *resp_len)
+{
+	struct vmbus_xact_ctx *ctx = xact->x_ctx;
+	const void *resp;
+
+	mtx_lock(&ctx->xc_lock);
+
+	KASSERT(ctx->xc_active == xact, ("xact mismatch"));
+	if (xact->x_resp == NULL &&
+	    (ctx->xc_flags & VMBUS_XACT_CTXF_DESTROY) == 0) {
+		mtx_unlock(&ctx->xc_lock);
+		*resp_len = 0;
+		return (NULL);
+	}
+	resp = vmbus_xact_return(xact, resp_len);
+
+	mtx_unlock(&ctx->xc_lock);
+
+	return (resp);
 }
 
 static void
 vmbus_xact_save_resp(struct vmbus_xact *xact, const void *data, size_t dlen)
 {
 	struct vmbus_xact_ctx *ctx = xact->x_ctx;
 	size_t cplen = dlen;
 
 	mtx_assert(&ctx->xc_lock, MA_OWNED);
 
 	if (cplen > ctx->xc_resp_size) {
 		printf("vmbus: xact response truncated %zu -> %zu\n",
 		    cplen, ctx->xc_resp_size);
 		cplen = ctx->xc_resp_size;
 	}
 
 	KASSERT(ctx->xc_active == xact, ("xact mismatch"));
 	memcpy(xact->x_resp0, data, cplen);
 	xact->x_resp_len = cplen;
 	xact->x_resp = xact->x_resp0;
 }
 
 void
 vmbus_xact_wakeup(struct vmbus_xact *xact, const void *data, size_t dlen)
 {
 	struct vmbus_xact_ctx *ctx = xact->x_ctx;
 	int do_wakeup = 0;
 
 	mtx_lock(&ctx->xc_lock);
 	/*
 	 * NOTE:
 	 * xc_active could be NULL, if the ctx has been orphaned.
 	 */
 	if (ctx->xc_active != NULL) {
 		vmbus_xact_save_resp(xact, data, dlen);
 		do_wakeup = 1;
 	} else {
 		KASSERT(ctx->xc_flags & VMBUS_XACT_CTXF_DESTROY,
 		    ("no active xact pending"));
 		printf("vmbus: drop xact response\n");
 	}
 	mtx_unlock(&ctx->xc_lock);
 
 	if (do_wakeup)
 		wakeup(&ctx->xc_active);
 }
 
 void
 vmbus_xact_ctx_wakeup(struct vmbus_xact_ctx *ctx, const void *data, size_t dlen)
 {
 	int do_wakeup = 0;
 
 	mtx_lock(&ctx->xc_lock);
 	/*
 	 * NOTE:
 	 * xc_active could be NULL, if the ctx has been orphaned.
 	 */
 	if (ctx->xc_active != NULL) {
 		vmbus_xact_save_resp(ctx->xc_active, data, dlen);
 		do_wakeup = 1;
 	} else {
 		KASSERT(ctx->xc_flags & VMBUS_XACT_CTXF_DESTROY,
 		    ("no active xact pending"));
 		printf("vmbus: drop xact response\n");
 	}
 	mtx_unlock(&ctx->xc_lock);
 
 	if (do_wakeup)
 		wakeup(&ctx->xc_active);
 }
Index: projects/clang391-import/sys/dev/usb/wlan/if_rsu.c
===================================================================
--- projects/clang391-import/sys/dev/usb/wlan/if_rsu.c	(revision 309262)
+++ projects/clang391-import/sys/dev/usb/wlan/if_rsu.c	(revision 309263)
@@ -1,2980 +1,3053 @@
 /*	$OpenBSD: if_rsu.c,v 1.17 2013/04/15 09:23:01 mglocker Exp $	*/
 
 /*-
  * Copyright (c) 2010 Damien Bergamini <damien.bergamini@free.fr>
  *
  * Permission to use, copy, modify, and distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
  * copyright notice and this permission notice appear in all copies.
  *
  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  */
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * Driver for Realtek RTL8188SU/RTL8191SU/RTL8192SU.
  *
  * TODO:
  *   o h/w crypto
  *   o hostap / ibss / mesh
  *   o sensible RSSI levels
  *   o power-save operation
  */
 
 #include "opt_wlan.h"
 
 #include <sys/param.h>
 #include <sys/endian.h>
 #include <sys/sockio.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/kernel.h>
 #include <sys/socket.h>
 #include <sys/systm.h>
 #include <sys/conf.h>
 #include <sys/bus.h>
 #include <sys/rman.h>
 #include <sys/firmware.h>
 #include <sys/module.h>
 
 #include <machine/bus.h>
 #include <machine/resource.h>
 
 #include <net/bpf.h>
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_arp.h>
 #include <net/if_dl.h>
 #include <net/if_media.h>
 #include <net/if_types.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/if_ether.h>
 #include <netinet/ip.h>
 
 #include <net80211/ieee80211_var.h>
 #include <net80211/ieee80211_regdomain.h>
 #include <net80211/ieee80211_radiotap.h>
 
 #include <dev/usb/usb.h>
 #include <dev/usb/usbdi.h>
 #include "usbdevs.h"
 
 #define USB_DEBUG_VAR rsu_debug
 #include <dev/usb/usb_debug.h>
 
 #include <dev/usb/wlan/if_rsureg.h>
 
 #ifdef USB_DEBUG
 static int rsu_debug = 0;
 SYSCTL_NODE(_hw_usb, OID_AUTO, rsu, CTLFLAG_RW, 0, "USB rsu");
 SYSCTL_INT(_hw_usb_rsu, OID_AUTO, debug, CTLFLAG_RWTUN, &rsu_debug, 0,
     "Debug level");
 #define	RSU_DPRINTF(_sc, _flg, ...)					\
 	do								\
 		if (((_flg) == (RSU_DEBUG_ANY)) || (rsu_debug & (_flg))) \
 			device_printf((_sc)->sc_dev, __VA_ARGS__);	\
 	while (0)
 #else
 #define	RSU_DPRINTF(_sc, _flg, ...)
 #endif
 
 static int rsu_enable_11n = 1;
 TUNABLE_INT("hw.usb.rsu.enable_11n", &rsu_enable_11n);
 
 #define	RSU_DEBUG_ANY		0xffffffff
 #define	RSU_DEBUG_TX		0x00000001
 #define	RSU_DEBUG_RX		0x00000002
 #define	RSU_DEBUG_RESET		0x00000004
 #define	RSU_DEBUG_CALIB		0x00000008
 #define	RSU_DEBUG_STATE		0x00000010
 #define	RSU_DEBUG_SCAN		0x00000020
 #define	RSU_DEBUG_FWCMD		0x00000040
 #define	RSU_DEBUG_TXDONE	0x00000080
 #define	RSU_DEBUG_FW		0x00000100
 #define	RSU_DEBUG_FWDBG		0x00000200
 #define	RSU_DEBUG_AMPDU		0x00000400
 
 static const STRUCT_USB_HOST_ID rsu_devs[] = {
 #define	RSU_HT_NOT_SUPPORTED 0
 #define	RSU_HT_SUPPORTED 1
 #define RSU_DEV_HT(v,p)  { USB_VPI(USB_VENDOR_##v, USB_PRODUCT_##v##_##p, \
 				   RSU_HT_SUPPORTED) }
 #define RSU_DEV(v,p)     { USB_VPI(USB_VENDOR_##v, USB_PRODUCT_##v##_##p, \
 				   RSU_HT_NOT_SUPPORTED) }
 	RSU_DEV(ASUS,			RTL8192SU),
 	RSU_DEV(AZUREWAVE,		RTL8192SU_4),
 	RSU_DEV_HT(ACCTON,		RTL8192SU),
 	RSU_DEV_HT(ASUS,		USBN10),
 	RSU_DEV_HT(AZUREWAVE,		RTL8192SU_1),
 	RSU_DEV_HT(AZUREWAVE,		RTL8192SU_2),
 	RSU_DEV_HT(AZUREWAVE,		RTL8192SU_3),
 	RSU_DEV_HT(AZUREWAVE,		RTL8192SU_5),
 	RSU_DEV_HT(BELKIN,		RTL8192SU_1),
 	RSU_DEV_HT(BELKIN,		RTL8192SU_2),
 	RSU_DEV_HT(BELKIN,		RTL8192SU_3),
 	RSU_DEV_HT(CONCEPTRONIC2,	RTL8192SU_1),
 	RSU_DEV_HT(CONCEPTRONIC2,	RTL8192SU_2),
 	RSU_DEV_HT(CONCEPTRONIC2,	RTL8192SU_3),
 	RSU_DEV_HT(COREGA,		RTL8192SU),
 	RSU_DEV_HT(DLINK2,		DWA131A1),
 	RSU_DEV_HT(DLINK2,		RTL8192SU_1),
 	RSU_DEV_HT(DLINK2,		RTL8192SU_2),
 	RSU_DEV_HT(EDIMAX,		RTL8192SU_1),
 	RSU_DEV_HT(EDIMAX,		RTL8192SU_2),
 	RSU_DEV_HT(EDIMAX,		EW7622UMN),
 	RSU_DEV_HT(GUILLEMOT,		HWGUN54),
 	RSU_DEV_HT(GUILLEMOT,		HWNUM300),
 	RSU_DEV_HT(HAWKING,		RTL8192SU_1),
 	RSU_DEV_HT(HAWKING,		RTL8192SU_2),
 	RSU_DEV_HT(PLANEX2,		GWUSNANO),
 	RSU_DEV_HT(REALTEK,		RTL8171),
 	RSU_DEV_HT(REALTEK,		RTL8172),
 	RSU_DEV_HT(REALTEK,		RTL8173),
 	RSU_DEV_HT(REALTEK,		RTL8174),
 	RSU_DEV_HT(REALTEK,		RTL8192SU),
 	RSU_DEV_HT(REALTEK,		RTL8712),
 	RSU_DEV_HT(REALTEK,		RTL8713),
 	RSU_DEV_HT(SENAO,		RTL8192SU_1),
 	RSU_DEV_HT(SENAO,		RTL8192SU_2),
 	RSU_DEV_HT(SITECOMEU,		WL349V1),
 	RSU_DEV_HT(SITECOMEU,		WL353),
 	RSU_DEV_HT(SWEEX2,		LW154),
 	RSU_DEV_HT(TRENDNET,		TEW646UBH),
 #undef RSU_DEV_HT
 #undef RSU_DEV
 };
 
 static device_probe_t   rsu_match;
 static device_attach_t  rsu_attach;
 static device_detach_t  rsu_detach;
 static usb_callback_t   rsu_bulk_tx_callback_be_bk;
 static usb_callback_t   rsu_bulk_tx_callback_vi_vo;
 static usb_callback_t   rsu_bulk_tx_callback_h2c;
 static usb_callback_t   rsu_bulk_rx_callback;
 static usb_error_t	rsu_do_request(struct rsu_softc *,
 			    struct usb_device_request *, void *);
 static struct ieee80211vap *
 		rsu_vap_create(struct ieee80211com *, const char name[],
 		    int, enum ieee80211_opmode, int, const uint8_t bssid[],
 		    const uint8_t mac[]);
 static void	rsu_vap_delete(struct ieee80211vap *);
 static void	rsu_scan_start(struct ieee80211com *);
 static void	rsu_scan_end(struct ieee80211com *);
 static void	rsu_getradiocaps(struct ieee80211com *, int, int *,
 		    struct ieee80211_channel[]);
 static void	rsu_set_channel(struct ieee80211com *);
 static void	rsu_scan_curchan(struct ieee80211_scan_state *, unsigned long);
 static void	rsu_scan_mindwell(struct ieee80211_scan_state *);
+static uint8_t	rsu_get_multi_pos(const uint8_t[]);
+static void	rsu_set_multi(struct rsu_softc *);
 static void	rsu_update_mcast(struct ieee80211com *);
 static int	rsu_alloc_rx_list(struct rsu_softc *);
 static void	rsu_free_rx_list(struct rsu_softc *);
 static int	rsu_alloc_tx_list(struct rsu_softc *);
 static void	rsu_free_tx_list(struct rsu_softc *);
 static void	rsu_free_list(struct rsu_softc *, struct rsu_data [], int);
 static struct rsu_data *_rsu_getbuf(struct rsu_softc *);
 static struct rsu_data *rsu_getbuf(struct rsu_softc *);
 static void	rsu_freebuf(struct rsu_softc *, struct rsu_data *);
 static int	rsu_write_region_1(struct rsu_softc *, uint16_t, uint8_t *,
 		    int);
 static void	rsu_write_1(struct rsu_softc *, uint16_t, uint8_t);
 static void	rsu_write_2(struct rsu_softc *, uint16_t, uint16_t);
 static void	rsu_write_4(struct rsu_softc *, uint16_t, uint32_t);
 static int	rsu_read_region_1(struct rsu_softc *, uint16_t, uint8_t *,
 		    int);
 static uint8_t	rsu_read_1(struct rsu_softc *, uint16_t);
 static uint16_t	rsu_read_2(struct rsu_softc *, uint16_t);
 static uint32_t	rsu_read_4(struct rsu_softc *, uint16_t);
 static int	rsu_fw_iocmd(struct rsu_softc *, uint32_t);
 static uint8_t	rsu_efuse_read_1(struct rsu_softc *, uint16_t);
 static int	rsu_read_rom(struct rsu_softc *);
 static int	rsu_fw_cmd(struct rsu_softc *, uint8_t, void *, int);
 static void	rsu_calib_task(void *, int);
 static void	rsu_tx_task(void *, int);
 static int	rsu_newstate(struct ieee80211vap *, enum ieee80211_state, int);
 #ifdef notyet
 static void	rsu_set_key(struct rsu_softc *, const struct ieee80211_key *);
 static void	rsu_delete_key(struct rsu_softc *, const struct ieee80211_key *);
 #endif
 static int	rsu_site_survey(struct rsu_softc *,
 		    struct ieee80211_scan_ssid *);
 static int	rsu_join_bss(struct rsu_softc *, struct ieee80211_node *);
 static int	rsu_disconnect(struct rsu_softc *);
 static int	rsu_hwrssi_to_rssi(struct rsu_softc *, int hw_rssi);
 static void	rsu_event_survey(struct rsu_softc *, uint8_t *, int);
 static void	rsu_event_join_bss(struct rsu_softc *, uint8_t *, int);
 static void	rsu_rx_event(struct rsu_softc *, uint8_t, uint8_t *, int);
 static void	rsu_rx_multi_event(struct rsu_softc *, uint8_t *, int);
 #if 0
 static int8_t	rsu_get_rssi(struct rsu_softc *, int, void *);
 #endif
 static struct mbuf * rsu_rx_frame(struct rsu_softc *, uint8_t *, int);
 static struct mbuf * rsu_rx_multi_frame(struct rsu_softc *, uint8_t *, int);
 static struct mbuf *
 		rsu_rxeof(struct usb_xfer *, struct rsu_data *);
 static void	rsu_txeof(struct usb_xfer *, struct rsu_data *);
 static int	rsu_raw_xmit(struct ieee80211_node *, struct mbuf *, 
 		    const struct ieee80211_bpf_params *);
 static void	rsu_init(struct rsu_softc *);
 static int	rsu_tx_start(struct rsu_softc *, struct ieee80211_node *, 
 		    struct mbuf *, struct rsu_data *);
 static int	rsu_transmit(struct ieee80211com *, struct mbuf *);
 static void	rsu_start(struct rsu_softc *);
 static void	_rsu_start(struct rsu_softc *);
 static void	rsu_parent(struct ieee80211com *);
 static void	rsu_stop(struct rsu_softc *);
 static void	rsu_ms_delay(struct rsu_softc *, int);
 
 static device_method_t rsu_methods[] = {
 	DEVMETHOD(device_probe,		rsu_match),
 	DEVMETHOD(device_attach,	rsu_attach),
 	DEVMETHOD(device_detach,	rsu_detach),
 
 	DEVMETHOD_END
 };
 
 static driver_t rsu_driver = {
 	.name = "rsu",
 	.methods = rsu_methods,
 	.size = sizeof(struct rsu_softc)
 };
 
 static devclass_t rsu_devclass;
 
 DRIVER_MODULE(rsu, uhub, rsu_driver, rsu_devclass, NULL, 0);
 MODULE_DEPEND(rsu, wlan, 1, 1, 1);
 MODULE_DEPEND(rsu, usb, 1, 1, 1);
 MODULE_DEPEND(rsu, firmware, 1, 1, 1);
 MODULE_VERSION(rsu, 1);
 USB_PNP_HOST_INFO(rsu_devs);
 
 static const uint8_t rsu_chan_2ghz[] =
 	{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 };
 
 static uint8_t rsu_wme_ac_xfer_map[4] = {
 	[WME_AC_BE] = RSU_BULK_TX_BE_BK,
 	[WME_AC_BK] = RSU_BULK_TX_BE_BK,
 	[WME_AC_VI] = RSU_BULK_TX_VI_VO,
 	[WME_AC_VO] = RSU_BULK_TX_VI_VO,
 };
 
 /* XXX hard-coded */
 #define	RSU_H2C_ENDPOINT	3
 
 static const struct usb_config rsu_config[RSU_N_TRANSFER] = {
 	[RSU_BULK_RX] = {
 		.type = UE_BULK,
 		.endpoint = UE_ADDR_ANY,
 		.direction = UE_DIR_IN,
 		.bufsize = RSU_RXBUFSZ,
 		.flags = {
 			.pipe_bof = 1,
 			.short_xfer_ok = 1
 		},
 		.callback = rsu_bulk_rx_callback
 	},
 	[RSU_BULK_TX_BE_BK] = {
 		.type = UE_BULK,
 		.endpoint = 0x06,
 		.direction = UE_DIR_OUT,
 		.bufsize = RSU_TXBUFSZ,
 		.flags = {
 			.ext_buffer = 1,
 			.pipe_bof = 1,
 			.force_short_xfer = 1
 		},
 		.callback = rsu_bulk_tx_callback_be_bk,
 		.timeout = RSU_TX_TIMEOUT
 	},
 	[RSU_BULK_TX_VI_VO] = {
 		.type = UE_BULK,
 		.endpoint = 0x04,
 		.direction = UE_DIR_OUT,
 		.bufsize = RSU_TXBUFSZ,
 		.flags = {
 			.ext_buffer = 1,
 			.pipe_bof = 1,
 			.force_short_xfer = 1
 		},
 		.callback = rsu_bulk_tx_callback_vi_vo,
 		.timeout = RSU_TX_TIMEOUT
 	},
 	[RSU_BULK_TX_H2C] = {
 		.type = UE_BULK,
 		.endpoint = 0x0d,
 		.direction = UE_DIR_OUT,
 		.bufsize = RSU_TXBUFSZ,
 		.flags = {
 			.ext_buffer = 1,
 			.pipe_bof = 1,
 			.short_xfer_ok = 1
 		},
 		.callback = rsu_bulk_tx_callback_h2c,
 		.timeout = RSU_TX_TIMEOUT
 	},
 };
 
 static int
 rsu_match(device_t self)
 {
 	struct usb_attach_arg *uaa = device_get_ivars(self);
 
 	if (uaa->usb_mode != USB_MODE_HOST ||
 	    uaa->info.bIfaceIndex != 0 ||
 	    uaa->info.bConfigIndex != 0)
 		return (ENXIO);
 
 	return (usbd_lookup_id_by_uaa(rsu_devs, sizeof(rsu_devs), uaa));
 }
 
 static int
 rsu_send_mgmt(struct ieee80211_node *ni, int type, int arg)
 {
 
 	return (ENOTSUP);
 }
 
 static void
 rsu_update_chw(struct ieee80211com *ic)
 {
 
 }
 
 /*
  * notification from net80211 that it'd like to do A-MPDU on the given TID.
  *
  * Note: this actually hangs traffic at the present moment, so don't use it.
  * The firmware debug does indiciate it's sending and establishing a TX AMPDU
  * session, but then no traffic flows.
  */
 static int
 rsu_ampdu_enable(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap)
 {
 #if 0
 	struct rsu_softc *sc = ni->ni_ic->ic_softc;
 	struct r92s_add_ba_req req;
 
 	/* Don't enable if it's requested or running */
 	if (IEEE80211_AMPDU_REQUESTED(tap))
 		return (0);
 	if (IEEE80211_AMPDU_RUNNING(tap))
 		return (0);
 
 	/* We've decided to send addba; so send it */
 	req.tid = htole32(tap->txa_tid);
 
 	/* Attempt net80211 state */
 	if (ieee80211_ampdu_tx_request_ext(ni, tap->txa_tid) != 1)
 		return (0);
 
 	/* Send the firmware command */
 	RSU_DPRINTF(sc, RSU_DEBUG_AMPDU, "%s: establishing AMPDU TX for TID %d\n",
 	    __func__,
 	    tap->txa_tid);
 
 	RSU_LOCK(sc);
 	if (rsu_fw_cmd(sc, R92S_CMD_ADDBA_REQ, &req, sizeof(req)) != 1) {
 		RSU_UNLOCK(sc);
 		/* Mark failure */
 		(void) ieee80211_ampdu_tx_request_active_ext(ni, tap->txa_tid, 0);
 		return (0);
 	}
 	RSU_UNLOCK(sc);
 
 	/* Mark success; we don't get any further notifications */
 	(void) ieee80211_ampdu_tx_request_active_ext(ni, tap->txa_tid, 1);
 #endif
 	/* Return 0, we're driving this ourselves */
 	return (0);
 }
 
 static int
 rsu_wme_update(struct ieee80211com *ic)
 {
 
 	/* Firmware handles this; not our problem */
 	return (0);
 }
 
 static int
 rsu_attach(device_t self)
 {
 	struct usb_attach_arg *uaa = device_get_ivars(self);
 	struct rsu_softc *sc = device_get_softc(self);
 	struct ieee80211com *ic = &sc->sc_ic;
 	int error;
 	uint8_t iface_index;
 	struct usb_interface *iface;
 	const char *rft;
 
 	device_set_usb_desc(self);
 	sc->sc_udev = uaa->device;
 	sc->sc_dev = self;
 	if (rsu_enable_11n)
 		sc->sc_ht = !! (USB_GET_DRIVER_INFO(uaa) & RSU_HT_SUPPORTED);
 
 	/* Get number of endpoints */
 	iface = usbd_get_iface(sc->sc_udev, 0);
 	sc->sc_nendpoints = iface->idesc->bNumEndpoints;
 
 	/* Endpoints are hard-coded for now, so enforce 4-endpoint only */
 	if (sc->sc_nendpoints != 4) {
 		device_printf(sc->sc_dev,
 		    "the driver currently only supports 4-endpoint devices\n");
 		return (ENXIO);
 	}
 
 	mtx_init(&sc->sc_mtx, device_get_nameunit(self), MTX_NETWORK_LOCK,
 	    MTX_DEF);
 	TIMEOUT_TASK_INIT(taskqueue_thread, &sc->calib_task, 0, 
 	    rsu_calib_task, sc);
 	TASK_INIT(&sc->tx_task, 0, rsu_tx_task, sc);
 	mbufq_init(&sc->sc_snd, ifqmaxlen);
 
 	/* Allocate Tx/Rx buffers. */
 	error = rsu_alloc_rx_list(sc);
 	if (error != 0) {
 		device_printf(sc->sc_dev, "could not allocate Rx buffers\n");
 		goto fail_usb;
 	}
 
 	error = rsu_alloc_tx_list(sc);
 	if (error != 0) {
 		device_printf(sc->sc_dev, "could not allocate Tx buffers\n");
 		rsu_free_rx_list(sc);
 		goto fail_usb;
 	}
 
 	iface_index = 0;
 	error = usbd_transfer_setup(uaa->device, &iface_index, sc->sc_xfer,
 	    rsu_config, RSU_N_TRANSFER, sc, &sc->sc_mtx);
 	if (error) {
 		device_printf(sc->sc_dev,
 		    "could not allocate USB transfers, err=%s\n", 
 		    usbd_errstr(error));
 		goto fail_usb;
 	}
 	RSU_LOCK(sc);
 	/* Read chip revision. */
 	sc->cut = MS(rsu_read_4(sc, R92S_PMC_FSM), R92S_PMC_FSM_CUT);
 	if (sc->cut != 3)
 		sc->cut = (sc->cut >> 1) + 1;
 	error = rsu_read_rom(sc);
 	RSU_UNLOCK(sc);
 	if (error != 0) {
 		device_printf(self, "could not read ROM\n");
 		goto fail_rom;
 	}
 
 	/* Figure out TX/RX streams */
 	switch (sc->rom[84]) {
 	case 0x0:
 		sc->sc_rftype = RTL8712_RFCONFIG_1T1R;
 		sc->sc_nrxstream = 1;
 		sc->sc_ntxstream = 1;
 		rft = "1T1R";
 		break;
 	case 0x1:
 		sc->sc_rftype = RTL8712_RFCONFIG_1T2R;
 		sc->sc_nrxstream = 2;
 		sc->sc_ntxstream = 1;
 		rft = "1T2R";
 		break;
 	case 0x2:
 		sc->sc_rftype = RTL8712_RFCONFIG_2T2R;
 		sc->sc_nrxstream = 2;
 		sc->sc_ntxstream = 2;
 		rft = "2T2R";
 		break;
 	default:
 		device_printf(sc->sc_dev,
 		    "%s: unknown board type (rfconfig=0x%02x)\n",
 		    __func__,
 		    sc->rom[84]);
 		goto fail_rom;
 	}
 
 	IEEE80211_ADDR_COPY(ic->ic_macaddr, &sc->rom[0x12]);
 	device_printf(self, "MAC/BB RTL8712 cut %d %s\n", sc->cut, rft);
 
 	ic->ic_softc = sc;
 	ic->ic_name = device_get_nameunit(self);
 	ic->ic_phytype = IEEE80211_T_OFDM;	/* Not only, but not used. */
 	ic->ic_opmode = IEEE80211_M_STA;	/* Default to BSS mode. */
 
 	/* Set device capabilities. */
 	ic->ic_caps =
 	    IEEE80211_C_STA |		/* station mode */
 #if 0
 	    IEEE80211_C_BGSCAN |	/* Background scan. */
 #endif
 	    IEEE80211_C_SHPREAMBLE |	/* Short preamble supported. */
 	    IEEE80211_C_WME |		/* WME/QoS */
 	    IEEE80211_C_SHSLOT |	/* Short slot time supported. */
 	    IEEE80211_C_WPA;		/* WPA/RSN. */
 
 	/* Check if HT support is present. */
 	if (sc->sc_ht) {
 		device_printf(sc->sc_dev, "%s: enabling 11n\n", __func__);
 
 		/* Enable basic HT */
 		ic->ic_htcaps = IEEE80211_HTC_HT |
 #if 0
 		    IEEE80211_HTC_AMPDU |
 #endif
 		    IEEE80211_HTC_AMSDU |
 		    IEEE80211_HTCAP_MAXAMSDU_3839 |
 		    IEEE80211_HTCAP_SMPS_OFF;
 		ic->ic_htcaps |= IEEE80211_HTCAP_CHWIDTH40;
 
 		/* set number of spatial streams */
 		ic->ic_txstream = sc->sc_ntxstream;
 		ic->ic_rxstream = sc->sc_nrxstream;
 	}
 	ic->ic_flags_ext |= IEEE80211_FEXT_SCAN_OFFLOAD;
 
 	rsu_getradiocaps(ic, IEEE80211_CHAN_MAX, &ic->ic_nchans,
 	    ic->ic_channels);
 
 	ieee80211_ifattach(ic);
 	ic->ic_raw_xmit = rsu_raw_xmit;
 	ic->ic_scan_start = rsu_scan_start;
 	ic->ic_scan_end = rsu_scan_end;
 	ic->ic_getradiocaps = rsu_getradiocaps;
 	ic->ic_set_channel = rsu_set_channel;
 	ic->ic_scan_curchan = rsu_scan_curchan;
 	ic->ic_scan_mindwell = rsu_scan_mindwell;
 	ic->ic_vap_create = rsu_vap_create;
 	ic->ic_vap_delete = rsu_vap_delete;
 	ic->ic_update_mcast = rsu_update_mcast;
 	ic->ic_parent = rsu_parent;
 	ic->ic_transmit = rsu_transmit;
 	ic->ic_send_mgmt = rsu_send_mgmt;
 	ic->ic_update_chw = rsu_update_chw;
 	ic->ic_ampdu_enable = rsu_ampdu_enable;
 	ic->ic_wme.wme_update = rsu_wme_update;
 
 	ieee80211_radiotap_attach(ic, &sc->sc_txtap.wt_ihdr,
 	    sizeof(sc->sc_txtap), RSU_TX_RADIOTAP_PRESENT, 
 	    &sc->sc_rxtap.wr_ihdr, sizeof(sc->sc_rxtap),
 	    RSU_RX_RADIOTAP_PRESENT);
 
 	if (bootverbose)
 		ieee80211_announce(ic);
 
 	return (0);
 
 fail_rom:
 	usbd_transfer_unsetup(sc->sc_xfer, RSU_N_TRANSFER);
 fail_usb:
 	mtx_destroy(&sc->sc_mtx);
 	return (ENXIO);
 }
 
 static int
 rsu_detach(device_t self)
 {
 	struct rsu_softc *sc = device_get_softc(self);
 	struct ieee80211com *ic = &sc->sc_ic;
 
 	RSU_LOCK(sc);
 	rsu_stop(sc);
 	RSU_UNLOCK(sc);
 
 	usbd_transfer_unsetup(sc->sc_xfer, RSU_N_TRANSFER);
 
 	/*
 	 * Free buffers /before/ we detach from net80211, else node
 	 * references to destroyed vaps will lead to a panic.
 	 */
 	/* Free Tx/Rx buffers. */
 	RSU_LOCK(sc);
 	rsu_free_tx_list(sc);
 	rsu_free_rx_list(sc);
 	RSU_UNLOCK(sc);
 
 	/* Frames are freed; detach from net80211 */
 	ieee80211_ifdetach(ic);
 
 	taskqueue_drain_timeout(taskqueue_thread, &sc->calib_task);
 	taskqueue_drain(taskqueue_thread, &sc->tx_task);
 
 	mtx_destroy(&sc->sc_mtx);
 
 	return (0);
 }
 
 static usb_error_t
 rsu_do_request(struct rsu_softc *sc, struct usb_device_request *req,
     void *data)
 {
 	usb_error_t err;
 	int ntries = 10;
 	
 	RSU_ASSERT_LOCKED(sc);
 
 	while (ntries--) {
 		err = usbd_do_request_flags(sc->sc_udev, &sc->sc_mtx,
 		    req, data, 0, NULL, 250 /* ms */);
 		if (err == 0 || err == USB_ERR_NOT_CONFIGURED)
 			break;
 		DPRINTFN(1, "Control request failed, %s (retrying)\n",
 		    usbd_errstr(err));
 		rsu_ms_delay(sc, 10);
         }
 
         return (err);
 }
 
 static struct ieee80211vap *
 rsu_vap_create(struct ieee80211com *ic, const char name[IFNAMSIZ], int unit,
     enum ieee80211_opmode opmode, int flags,
     const uint8_t bssid[IEEE80211_ADDR_LEN],
     const uint8_t mac[IEEE80211_ADDR_LEN])
 {
 	struct rsu_vap *uvp;
 	struct ieee80211vap *vap;
 
 	if (!TAILQ_EMPTY(&ic->ic_vaps))         /* only one at a time */
 		return (NULL);
 
 	uvp =  malloc(sizeof(struct rsu_vap), M_80211_VAP, M_WAITOK | M_ZERO);
 	vap = &uvp->vap;
 
 	if (ieee80211_vap_setup(ic, vap, name, unit, opmode,
 	    flags, bssid) != 0) {
 		/* out of memory */
 		free(uvp, M_80211_VAP);
 		return (NULL);
 	}
 
 	/* override state transition machine */
 	uvp->newstate = vap->iv_newstate;
 	vap->iv_newstate = rsu_newstate;
 
 	/* Limits from the r92su driver */
 	vap->iv_ampdu_density = IEEE80211_HTCAP_MPDUDENSITY_16;
 	vap->iv_ampdu_rxmax = IEEE80211_HTCAP_MAXRXAMPDU_32K;
 
 	/* complete setup */
 	ieee80211_vap_attach(vap, ieee80211_media_change,
 	    ieee80211_media_status, mac);
 	ic->ic_opmode = opmode;
 
 	return (vap);
 }
 
 static void
 rsu_vap_delete(struct ieee80211vap *vap)
 {
 	struct rsu_vap *uvp = RSU_VAP(vap);
 
 	ieee80211_vap_detach(vap);
 	free(uvp, M_80211_VAP);
 }
 
 static void
 rsu_scan_start(struct ieee80211com *ic)
 {
 	struct rsu_softc *sc = ic->ic_softc;
 	struct ieee80211_scan_state *ss = ic->ic_scan;
 	struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps);
 	int error;
 
 	/* Scanning is done by the firmware. */
 	RSU_LOCK(sc);
 	sc->sc_active_scan = !!(ss->ss_flags & IEEE80211_SCAN_ACTIVE);
 	/* XXX TODO: force awake if in network-sleep? */
 	error = rsu_site_survey(sc, ss->ss_nssid > 0 ? &ss->ss_ssid[0] : NULL);
 	RSU_UNLOCK(sc);
 	if (error != 0) {
 		device_printf(sc->sc_dev,
 		    "could not send site survey command\n");
 		ieee80211_cancel_scan(vap);
 	}
 }
 
 static void
 rsu_scan_end(struct ieee80211com *ic)
 {
 	/* Nothing to do here. */
 }
 
 static void
 rsu_getradiocaps(struct ieee80211com *ic,
     int maxchans, int *nchans, struct ieee80211_channel chans[])
 {
 	struct rsu_softc *sc = ic->ic_softc;
 	uint8_t bands[IEEE80211_MODE_BYTES];
 
 	/* Set supported .11b and .11g rates. */
 	memset(bands, 0, sizeof(bands));
 	setbit(bands, IEEE80211_MODE_11B);
 	setbit(bands, IEEE80211_MODE_11G);
 	if (sc->sc_ht)
 		setbit(bands, IEEE80211_MODE_11NG);
 	ieee80211_add_channel_list_2ghz(chans, maxchans, nchans,
 	    rsu_chan_2ghz, nitems(rsu_chan_2ghz), bands, 0);
 }
 
 static void
 rsu_set_channel(struct ieee80211com *ic __unused)
 {
 	/* We are unable to switch channels, yet. */
 }
 
 static void
 rsu_scan_curchan(struct ieee80211_scan_state *ss, unsigned long maxdwell)
 {
 	/* Scan is done in rsu_scan_start(). */
 }
 
 /**
  * Called by the net80211 framework to indicate
  * the minimum dwell time has been met, terminate the scan.
  * We don't actually terminate the scan as the firmware will notify
  * us when it's finished and we have no way to interrupt it.
  */
 static void
 rsu_scan_mindwell(struct ieee80211_scan_state *ss)
 {
 	/* NB: don't try to abort scan; wait for firmware to finish */
 }
 
+/*
+ * The same as rtwn_get_multi_pos() / rtwn_set_multi().
+ */
+static uint8_t
+rsu_get_multi_pos(const uint8_t maddr[])
+{
+	uint64_t mask = 0x00004d101df481b4;
+	uint8_t pos = 0x27;	/* initial value */
+	int i, j;
+
+	for (i = 0; i < IEEE80211_ADDR_LEN; i++)
+		for (j = (i == 0) ? 1 : 0; j < 8; j++)
+			if ((maddr[i] >> j) & 1)
+				pos ^= (mask >> (i * 8 + j - 1));
+
+	pos &= 0x3f;
+
+	return (pos);
+}
+
 static void
+rsu_set_multi(struct rsu_softc *sc)
+{
+	struct ieee80211com *ic = &sc->sc_ic;
+	uint32_t mfilt[2];
+
+	RSU_ASSERT_LOCKED(sc);
+
+	/* general structure was copied from ath(4). */
+	if (ic->ic_allmulti == 0) {
+		struct ieee80211vap *vap;
+		struct ifnet *ifp;
+		struct ifmultiaddr *ifma;
+
+		/*
+		 * Merge multicast addresses to form the hardware filter.
+		 */
+		mfilt[0] = mfilt[1] = 0;
+		TAILQ_FOREACH(vap, &ic->ic_vaps, iv_next) {
+			ifp = vap->iv_ifp;
+			if_maddr_rlock(ifp);
+			TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
+				caddr_t dl;
+				uint8_t pos;
+
+				dl = LLADDR((struct sockaddr_dl *)
+				    ifma->ifma_addr);
+				pos = rsu_get_multi_pos(dl);
+
+				mfilt[pos / 32] |= (1 << (pos % 32));
+			}
+			if_maddr_runlock(ifp);
+		}
+	} else
+		mfilt[0] = mfilt[1] = ~0;
+
+	rsu_write_4(sc, R92S_MAR + 0, mfilt[0]);
+	rsu_write_4(sc, R92S_MAR + 4, mfilt[1]);
+
+	RSU_DPRINTF(sc, RSU_DEBUG_STATE, "%s: MC filter %08x:%08x\n",
+	    __func__, mfilt[0], mfilt[1]);
+}
+
+static void
 rsu_update_mcast(struct ieee80211com *ic)
 {
-        /* XXX do nothing?  */
+	struct rsu_softc *sc = ic->ic_softc;
+
+	RSU_LOCK(sc);
+	if (sc->sc_running)
+		rsu_set_multi(sc);
+	RSU_UNLOCK(sc);
 }
 
 static int
 rsu_alloc_list(struct rsu_softc *sc, struct rsu_data data[],
     int ndata, int maxsz)
 {
 	int i, error;
 
 	for (i = 0; i < ndata; i++) {
 		struct rsu_data *dp = &data[i];
 		dp->sc = sc;
 		dp->m = NULL;
 		dp->buf = malloc(maxsz, M_USBDEV, M_NOWAIT);
 		if (dp->buf == NULL) {
 			device_printf(sc->sc_dev,
 			    "could not allocate buffer\n");
 			error = ENOMEM;
 			goto fail;
 		}
 		dp->ni = NULL;
 	}
 
 	return (0);
 fail:
 	rsu_free_list(sc, data, ndata);
 	return (error);
 }
 
 static int
 rsu_alloc_rx_list(struct rsu_softc *sc)
 {
         int error, i;
 
 	error = rsu_alloc_list(sc, sc->sc_rx, RSU_RX_LIST_COUNT,
 	    RSU_RXBUFSZ);
 	if (error != 0)
 		return (error);
 
 	STAILQ_INIT(&sc->sc_rx_active);
 	STAILQ_INIT(&sc->sc_rx_inactive);
 
 	for (i = 0; i < RSU_RX_LIST_COUNT; i++)
 		STAILQ_INSERT_HEAD(&sc->sc_rx_inactive, &sc->sc_rx[i], next);
 
 	return (0);
 }
 
 static int
 rsu_alloc_tx_list(struct rsu_softc *sc)
 {
 	int error, i;
 
 	error = rsu_alloc_list(sc, sc->sc_tx, RSU_TX_LIST_COUNT,
 	    RSU_TXBUFSZ);
 	if (error != 0)
 		return (error);
 
 	STAILQ_INIT(&sc->sc_tx_inactive);
 
 	for (i = 0; i != RSU_N_TRANSFER; i++) {
 		STAILQ_INIT(&sc->sc_tx_active[i]);
 		STAILQ_INIT(&sc->sc_tx_pending[i]);
 	}
 
 	for (i = 0; i < RSU_TX_LIST_COUNT; i++) {
 		STAILQ_INSERT_HEAD(&sc->sc_tx_inactive, &sc->sc_tx[i], next);
 	}
 
 	return (0);
 }
 
 static void
 rsu_free_tx_list(struct rsu_softc *sc)
 {
 	int i;
 
 	/* prevent further allocations from TX list(s) */
 	STAILQ_INIT(&sc->sc_tx_inactive);
 
 	for (i = 0; i != RSU_N_TRANSFER; i++) {
 		STAILQ_INIT(&sc->sc_tx_active[i]);
 		STAILQ_INIT(&sc->sc_tx_pending[i]);
 	}
 
 	rsu_free_list(sc, sc->sc_tx, RSU_TX_LIST_COUNT);
 }
 
 static void
 rsu_free_rx_list(struct rsu_softc *sc)
 {
 	/* prevent further allocations from RX list(s) */
 	STAILQ_INIT(&sc->sc_rx_inactive);
 	STAILQ_INIT(&sc->sc_rx_active);
 
 	rsu_free_list(sc, sc->sc_rx, RSU_RX_LIST_COUNT);
 }
 
 static void
 rsu_free_list(struct rsu_softc *sc, struct rsu_data data[], int ndata)
 {
 	int i;
 
 	for (i = 0; i < ndata; i++) {
 		struct rsu_data *dp = &data[i];
 
 		if (dp->buf != NULL) {
 			free(dp->buf, M_USBDEV);
 			dp->buf = NULL;
 		}
 		if (dp->ni != NULL) {
 			ieee80211_free_node(dp->ni);
 			dp->ni = NULL;
 		}
 	}
 }
 
 static struct rsu_data *
 _rsu_getbuf(struct rsu_softc *sc)
 {
 	struct rsu_data *bf;
 
 	bf = STAILQ_FIRST(&sc->sc_tx_inactive);
 	if (bf != NULL)
 		STAILQ_REMOVE_HEAD(&sc->sc_tx_inactive, next);
 	else
 		bf = NULL;
 	return (bf);
 }
 
 static struct rsu_data *
 rsu_getbuf(struct rsu_softc *sc)
 {
 	struct rsu_data *bf;
 
 	RSU_ASSERT_LOCKED(sc);
 
 	bf = _rsu_getbuf(sc);
 	if (bf == NULL) {
 		RSU_DPRINTF(sc, RSU_DEBUG_TX, "%s: no buffers\n", __func__);
 	}
 	return (bf);
 }
 
 static void
 rsu_freebuf(struct rsu_softc *sc, struct rsu_data *bf)
 {
 
 	RSU_ASSERT_LOCKED(sc);
 	STAILQ_INSERT_TAIL(&sc->sc_tx_inactive, bf, next);
 }
 
 static int
 rsu_write_region_1(struct rsu_softc *sc, uint16_t addr, uint8_t *buf,
     int len)
 {
 	usb_device_request_t req;
 
 	req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
 	req.bRequest = R92S_REQ_REGS;
 	USETW(req.wValue, addr);
 	USETW(req.wIndex, 0);
 	USETW(req.wLength, len);
 
 	return (rsu_do_request(sc, &req, buf));
 }
 
 static void
 rsu_write_1(struct rsu_softc *sc, uint16_t addr, uint8_t val)
 {
 	rsu_write_region_1(sc, addr, &val, 1);
 }
 
 static void
 rsu_write_2(struct rsu_softc *sc, uint16_t addr, uint16_t val)
 {
 	val = htole16(val);
 	rsu_write_region_1(sc, addr, (uint8_t *)&val, 2);
 }
 
 static void
 rsu_write_4(struct rsu_softc *sc, uint16_t addr, uint32_t val)
 {
 	val = htole32(val);
 	rsu_write_region_1(sc, addr, (uint8_t *)&val, 4);
 }
 
 static int
 rsu_read_region_1(struct rsu_softc *sc, uint16_t addr, uint8_t *buf,
     int len)
 {
 	usb_device_request_t req;
 
 	req.bmRequestType = UT_READ_VENDOR_DEVICE;
 	req.bRequest = R92S_REQ_REGS;
 	USETW(req.wValue, addr);
 	USETW(req.wIndex, 0);
 	USETW(req.wLength, len);
 
 	return (rsu_do_request(sc, &req, buf));
 }
 
 static uint8_t
 rsu_read_1(struct rsu_softc *sc, uint16_t addr)
 {
 	uint8_t val;
 
 	if (rsu_read_region_1(sc, addr, &val, 1) != 0)
 		return (0xff);
 	return (val);
 }
 
 static uint16_t
 rsu_read_2(struct rsu_softc *sc, uint16_t addr)
 {
 	uint16_t val;
 
 	if (rsu_read_region_1(sc, addr, (uint8_t *)&val, 2) != 0)
 		return (0xffff);
 	return (le16toh(val));
 }
 
 static uint32_t
 rsu_read_4(struct rsu_softc *sc, uint16_t addr)
 {
 	uint32_t val;
 
 	if (rsu_read_region_1(sc, addr, (uint8_t *)&val, 4) != 0)
 		return (0xffffffff);
 	return (le32toh(val));
 }
 
 static int
 rsu_fw_iocmd(struct rsu_softc *sc, uint32_t iocmd)
 {
 	int ntries;
 
 	rsu_write_4(sc, R92S_IOCMD_CTRL, iocmd);
 	rsu_ms_delay(sc, 1);
 	for (ntries = 0; ntries < 50; ntries++) {
 		if (rsu_read_4(sc, R92S_IOCMD_CTRL) == 0)
 			return (0);
 		rsu_ms_delay(sc, 1);
 	}
 	return (ETIMEDOUT);
 }
 
 static uint8_t
 rsu_efuse_read_1(struct rsu_softc *sc, uint16_t addr)
 {
 	uint32_t reg;
 	int ntries;
 
 	reg = rsu_read_4(sc, R92S_EFUSE_CTRL);
 	reg = RW(reg, R92S_EFUSE_CTRL_ADDR, addr);
 	reg &= ~R92S_EFUSE_CTRL_VALID;
 	rsu_write_4(sc, R92S_EFUSE_CTRL, reg);
 	/* Wait for read operation to complete. */
 	for (ntries = 0; ntries < 100; ntries++) {
 		reg = rsu_read_4(sc, R92S_EFUSE_CTRL);
 		if (reg & R92S_EFUSE_CTRL_VALID)
 			return (MS(reg, R92S_EFUSE_CTRL_DATA));
 		rsu_ms_delay(sc, 1);
 	}
 	device_printf(sc->sc_dev,
 	    "could not read efuse byte at address 0x%x\n", addr);
 	return (0xff);
 }
 
 static int
 rsu_read_rom(struct rsu_softc *sc)
 {
 	uint8_t *rom = sc->rom;
 	uint16_t addr = 0;
 	uint32_t reg;
 	uint8_t off, msk;
 	int i;
 
 	/* Make sure that ROM type is eFuse and that autoload succeeded. */
 	reg = rsu_read_1(sc, R92S_EE_9346CR);
 	if ((reg & (R92S_9356SEL | R92S_EEPROM_EN)) != R92S_EEPROM_EN)
 		return (EIO);
 
 	/* Turn on 2.5V to prevent eFuse leakage. */
 	reg = rsu_read_1(sc, R92S_EFUSE_TEST + 3);
 	rsu_write_1(sc, R92S_EFUSE_TEST + 3, reg | 0x80);
 	rsu_ms_delay(sc, 1);
 	rsu_write_1(sc, R92S_EFUSE_TEST + 3, reg & ~0x80);
 
 	/* Read full ROM image. */
 	memset(&sc->rom, 0xff, sizeof(sc->rom));
 	while (addr < 512) {
 		reg = rsu_efuse_read_1(sc, addr);
 		if (reg == 0xff)
 			break;
 		addr++;
 		off = reg >> 4;
 		msk = reg & 0xf;
 		for (i = 0; i < 4; i++) {
 			if (msk & (1 << i))
 				continue;
 			rom[off * 8 + i * 2 + 0] =
 			    rsu_efuse_read_1(sc, addr);
 			addr++;
 			rom[off * 8 + i * 2 + 1] =
 			    rsu_efuse_read_1(sc, addr);
 			addr++;
 		}
 	}
 #ifdef USB_DEBUG
 	if (rsu_debug >= 5) {
 		/* Dump ROM content. */
 		printf("\n");
 		for (i = 0; i < sizeof(sc->rom); i++)
 			printf("%02x:", rom[i]);
 		printf("\n");
 	}
 #endif
 	return (0);
 }
 
 static int
 rsu_fw_cmd(struct rsu_softc *sc, uint8_t code, void *buf, int len)
 {
 	const uint8_t which = RSU_H2C_ENDPOINT;
 	struct rsu_data *data;
 	struct r92s_tx_desc *txd;
 	struct r92s_fw_cmd_hdr *cmd;
 	int cmdsz;
 	int xferlen;
 
 	RSU_ASSERT_LOCKED(sc);
 
 	data = rsu_getbuf(sc);
 	if (data == NULL)
 		return (ENOMEM);
 
 	/* Blank the entire payload, just to be safe */
 	memset(data->buf, '\0', RSU_TXBUFSZ);
 
 	/* Round-up command length to a multiple of 8 bytes. */
 	/* XXX TODO: is this required? */
 	cmdsz = (len + 7) & ~7;
 
 	xferlen = sizeof(*txd) + sizeof(*cmd) + cmdsz;
 	KASSERT(xferlen <= RSU_TXBUFSZ, ("%s: invalid length", __func__));
 	memset(data->buf, 0, xferlen);
 
 	/* Setup Tx descriptor. */
 	txd = (struct r92s_tx_desc *)data->buf;
 	txd->txdw0 = htole32(
 	    SM(R92S_TXDW0_OFFSET, sizeof(*txd)) |
 	    SM(R92S_TXDW0_PKTLEN, sizeof(*cmd) + cmdsz) |
 	    R92S_TXDW0_OWN | R92S_TXDW0_FSG | R92S_TXDW0_LSG);
 	txd->txdw1 = htole32(SM(R92S_TXDW1_QSEL, R92S_TXDW1_QSEL_H2C));
 
 	/* Setup command header. */
 	cmd = (struct r92s_fw_cmd_hdr *)&txd[1];
 	cmd->len = htole16(cmdsz);
 	cmd->code = code;
 	cmd->seq = sc->cmd_seq;
 	sc->cmd_seq = (sc->cmd_seq + 1) & 0x7f;
 
 	/* Copy command payload. */
 	memcpy(&cmd[1], buf, len);
 
 	RSU_DPRINTF(sc, RSU_DEBUG_TX | RSU_DEBUG_FWCMD,
 	    "%s: Tx cmd code=0x%x len=0x%x\n",
 	    __func__, code, cmdsz);
 	data->buflen = xferlen;
 	STAILQ_INSERT_TAIL(&sc->sc_tx_pending[which], data, next);
 	usbd_transfer_start(sc->sc_xfer[which]);
 
 	return (0);
 }
 
 /* ARGSUSED */
 static void
 rsu_calib_task(void *arg, int pending __unused)
 {
 	struct rsu_softc *sc = arg;
 #ifdef notyet
 	uint32_t reg;
 #endif
 
 	RSU_DPRINTF(sc, RSU_DEBUG_CALIB, "%s: running calibration task\n",
 	    __func__);
 
 	RSU_LOCK(sc);
 #ifdef notyet
 	/* Read WPS PBC status. */
 	rsu_write_1(sc, R92S_MAC_PINMUX_CTRL,
 	    R92S_GPIOMUX_EN | SM(R92S_GPIOSEL_GPIO, R92S_GPIOSEL_GPIO_JTAG));
 	rsu_write_1(sc, R92S_GPIO_IO_SEL,
 	    rsu_read_1(sc, R92S_GPIO_IO_SEL) & ~R92S_GPIO_WPS);
 	reg = rsu_read_1(sc, R92S_GPIO_CTRL);
 	if (reg != 0xff && (reg & R92S_GPIO_WPS))
 		DPRINTF(("WPS PBC is pushed\n"));
 #endif
 	/* Read current signal level. */
 	if (rsu_fw_iocmd(sc, 0xf4000001) == 0) {
 		sc->sc_currssi = rsu_read_4(sc, R92S_IOCMD_DATA);
 		RSU_DPRINTF(sc, RSU_DEBUG_CALIB, "%s: RSSI=%d (%d)\n",
 		    __func__, sc->sc_currssi,
 		    rsu_hwrssi_to_rssi(sc, sc->sc_currssi));
 	}
 	if (sc->sc_calibrating)
 		taskqueue_enqueue_timeout(taskqueue_thread, &sc->calib_task, hz);
 	RSU_UNLOCK(sc);
 }
 
 static void
 rsu_tx_task(void *arg, int pending __unused)
 {
 	struct rsu_softc *sc = arg;
 
 	RSU_LOCK(sc);
 	_rsu_start(sc);
 	RSU_UNLOCK(sc);
 }
 
 #define	RSU_PWR_UNKNOWN		0x0
 #define	RSU_PWR_ACTIVE		0x1
 #define	RSU_PWR_OFF		0x2
 #define	RSU_PWR_SLEEP		0x3
 
 /*
  * Set the current power state.
  *
  * The rtlwifi code doesn't do this so aggressively; it
  * waits for an idle period after association with
  * no traffic before doing this.
  *
  * For now - it's on in all states except RUN, and
  * in RUN it'll transition to allow sleep.
  */
 
 struct r92s_pwr_cmd {
 	uint8_t mode;
 	uint8_t smart_ps;
 	uint8_t bcn_pass_time;
 };
 
 static int
 rsu_set_fw_power_state(struct rsu_softc *sc, int state)
 {
 	struct r92s_set_pwr_mode cmd;
 	//struct r92s_pwr_cmd cmd;
 	int error;
 
 	RSU_ASSERT_LOCKED(sc);
 
 	/* only change state if required */
 	if (sc->sc_curpwrstate == state)
 		return (0);
 
 	memset(&cmd, 0, sizeof(cmd));
 
 	switch (state) {
 	case RSU_PWR_ACTIVE:
 		/* Force the hardware awake */
 		rsu_write_1(sc, R92S_USB_HRPWM,
 		    R92S_USB_HRPWM_PS_ST_ACTIVE | R92S_USB_HRPWM_PS_ALL_ON);
 		cmd.mode = R92S_PS_MODE_ACTIVE;
 		break;
 	case RSU_PWR_SLEEP:
 		cmd.mode = R92S_PS_MODE_DTIM;	/* XXX configurable? */
 		cmd.smart_ps = 1; /* XXX 2 if doing p2p */
 		cmd.bcn_pass_time = 5; /* in 100mS usb.c, linux/rtlwifi */
 		break;
 	case RSU_PWR_OFF:
 		cmd.mode = R92S_PS_MODE_RADIOOFF;
 		break;
 	default:
 		device_printf(sc->sc_dev, "%s: unknown ps mode (%d)\n",
 		    __func__,
 		    state);
 		return (ENXIO);
 	}
 
 	RSU_DPRINTF(sc, RSU_DEBUG_RESET,
 	    "%s: setting ps mode to %d (mode %d)\n",
 	    __func__, state, cmd.mode);
 	error = rsu_fw_cmd(sc, R92S_CMD_SET_PWR_MODE, &cmd, sizeof(cmd));
 	if (error == 0)
 		sc->sc_curpwrstate = state;
 
 	return (error);
 }
 
 static int
 rsu_newstate(struct ieee80211vap *vap, enum ieee80211_state nstate, int arg)
 {
 	struct rsu_vap *uvp = RSU_VAP(vap);
 	struct ieee80211com *ic = vap->iv_ic;
 	struct rsu_softc *sc = ic->ic_softc;
 	struct ieee80211_node *ni;
 	struct ieee80211_rateset *rs;
 	enum ieee80211_state ostate;
 	int error, startcal = 0;
 
 	ostate = vap->iv_state;
 	RSU_DPRINTF(sc, RSU_DEBUG_STATE, "%s: %s -> %s\n",
 	    __func__,
 	    ieee80211_state_name[ostate],
 	    ieee80211_state_name[nstate]);
 
 	IEEE80211_UNLOCK(ic);
 	if (ostate == IEEE80211_S_RUN) {
 		RSU_LOCK(sc);
 		/* Stop calibration. */
 		sc->sc_calibrating = 0;
 		RSU_UNLOCK(sc);
 		taskqueue_drain_timeout(taskqueue_thread, &sc->calib_task);
 		taskqueue_drain(taskqueue_thread, &sc->tx_task);
 		/* Disassociate from our current BSS. */
 		RSU_LOCK(sc);
 		rsu_disconnect(sc);
 	} else
 		RSU_LOCK(sc);
 	switch (nstate) {
 	case IEEE80211_S_INIT:
 		(void) rsu_set_fw_power_state(sc, RSU_PWR_ACTIVE);
 		break;
 	case IEEE80211_S_AUTH:
 		ni = ieee80211_ref_node(vap->iv_bss);
 		(void) rsu_set_fw_power_state(sc, RSU_PWR_ACTIVE);
 		error = rsu_join_bss(sc, ni);
 		ieee80211_free_node(ni);
 		if (error != 0) {
 			device_printf(sc->sc_dev,
 			    "could not send join command\n");
 		}
 		break;
 	case IEEE80211_S_RUN:
 		ni = ieee80211_ref_node(vap->iv_bss);
 		rs = &ni->ni_rates;
 		/* Indicate highest supported rate. */
 		ni->ni_txrate = rs->rs_rates[rs->rs_nrates - 1];
 		(void) rsu_set_fw_power_state(sc, RSU_PWR_SLEEP);
 		ieee80211_free_node(ni);
 		startcal = 1;
 		break;
 	default:
 		break;
 	}
 	if (startcal != 0) {
 		sc->sc_calibrating = 1;
 		/* Start periodic calibration. */
 		taskqueue_enqueue_timeout(taskqueue_thread, &sc->calib_task,
 		    hz);
 	}
 	RSU_UNLOCK(sc);
 	IEEE80211_LOCK(ic);
 	return (uvp->newstate(vap, nstate, arg));
 }
 
 #ifdef notyet
 static void
 rsu_set_key(struct rsu_softc *sc, const struct ieee80211_key *k)
 {
 	struct r92s_fw_cmd_set_key key;
 
 	memset(&key, 0, sizeof(key));
 	/* Map net80211 cipher to HW crypto algorithm. */
 	switch (k->wk_cipher->ic_cipher) {
 	case IEEE80211_CIPHER_WEP:
 		if (k->wk_keylen < 8)
 			key.algo = R92S_KEY_ALGO_WEP40;
 		else
 			key.algo = R92S_KEY_ALGO_WEP104;
 		break;
 	case IEEE80211_CIPHER_TKIP:
 		key.algo = R92S_KEY_ALGO_TKIP;
 		break;
 	case IEEE80211_CIPHER_AES_CCM:
 		key.algo = R92S_KEY_ALGO_AES;
 		break;
 	default:
 		return;
 	}
 	key.id = k->wk_keyix;
 	key.grpkey = (k->wk_flags & IEEE80211_KEY_GROUP) != 0;
 	memcpy(key.key, k->wk_key, MIN(k->wk_keylen, sizeof(key.key)));
 	(void)rsu_fw_cmd(sc, R92S_CMD_SET_KEY, &key, sizeof(key));
 }
 
 static void
 rsu_delete_key(struct rsu_softc *sc, const struct ieee80211_key *k)
 {
 	struct r92s_fw_cmd_set_key key;
 
 	memset(&key, 0, sizeof(key));
 	key.id = k->wk_keyix;
 	(void)rsu_fw_cmd(sc, R92S_CMD_SET_KEY, &key, sizeof(key));
 }
 #endif
 
 static int
 rsu_site_survey(struct rsu_softc *sc, struct ieee80211_scan_ssid *ssid)
 {
 	struct r92s_fw_cmd_sitesurvey cmd;
 
 	RSU_ASSERT_LOCKED(sc);
 
 	memset(&cmd, 0, sizeof(cmd));
 	/* TODO: passive channels? */
 	if (sc->sc_active_scan)
 		cmd.active = htole32(1);
 	cmd.limit = htole32(48);
 	
 	if (ssid != NULL) {
 		sc->sc_extra_scan = 1;
 		cmd.ssidlen = htole32(ssid->len);
 		memcpy(cmd.ssid, ssid->ssid, ssid->len);
 	}
 #ifdef USB_DEBUG
 	if (rsu_debug & (RSU_DEBUG_SCAN | RSU_DEBUG_FWCMD)) {
 		device_printf(sc->sc_dev,
 		    "sending site survey command, active %d",
 		    le32toh(cmd.active));
 		if (ssid != NULL) {
 			printf(", ssid: ");
 			ieee80211_print_essid(cmd.ssid, le32toh(cmd.ssidlen));
 		}
 		printf("\n");
 	}
 #endif
 	return (rsu_fw_cmd(sc, R92S_CMD_SITE_SURVEY, &cmd, sizeof(cmd)));
 }
 
 static int
 rsu_join_bss(struct rsu_softc *sc, struct ieee80211_node *ni)
 {
 	struct ieee80211com *ic = &sc->sc_ic;
 	struct ieee80211vap *vap = ni->ni_vap;
 	struct ndis_wlan_bssid_ex *bss;
 	struct ndis_802_11_fixed_ies *fixed;
 	struct r92s_fw_cmd_auth auth;
 	uint8_t buf[sizeof(*bss) + 128] __aligned(4);
 	uint8_t *frm;
 	uint8_t opmode;
 	int error;
 
 	RSU_ASSERT_LOCKED(sc);
 
 	/* Let the FW decide the opmode based on the capinfo field. */
 	opmode = NDIS802_11AUTOUNKNOWN;
 	RSU_DPRINTF(sc, RSU_DEBUG_RESET,
 	    "%s: setting operating mode to %d\n",
 	    __func__, opmode);
 	error = rsu_fw_cmd(sc, R92S_CMD_SET_OPMODE, &opmode, sizeof(opmode));
 	if (error != 0)
 		return (error);
 
 	memset(&auth, 0, sizeof(auth));
 	if (vap->iv_flags & IEEE80211_F_WPA) {
 		auth.mode = R92S_AUTHMODE_WPA;
 		auth.dot1x = (ni->ni_authmode == IEEE80211_AUTH_8021X);
 	} else
 		auth.mode = R92S_AUTHMODE_OPEN;
 	RSU_DPRINTF(sc, RSU_DEBUG_RESET,
 	    "%s: setting auth mode to %d\n",
 	    __func__, auth.mode);
 	error = rsu_fw_cmd(sc, R92S_CMD_SET_AUTH, &auth, sizeof(auth));
 	if (error != 0)
 		return (error);
 
 	memset(buf, 0, sizeof(buf));
 	bss = (struct ndis_wlan_bssid_ex *)buf;
 	IEEE80211_ADDR_COPY(bss->macaddr, ni->ni_bssid);
 	bss->ssid.ssidlen = htole32(ni->ni_esslen);
 	memcpy(bss->ssid.ssid, ni->ni_essid, ni->ni_esslen);
 	if (vap->iv_flags & (IEEE80211_F_PRIVACY | IEEE80211_F_WPA))
 		bss->privacy = htole32(1);
 	bss->rssi = htole32(ni->ni_avgrssi);
 	if (ic->ic_curmode == IEEE80211_MODE_11B)
 		bss->networktype = htole32(NDIS802_11DS);
 	else
 		bss->networktype = htole32(NDIS802_11OFDM24);
 	bss->config.len = htole32(sizeof(bss->config));
 	bss->config.bintval = htole32(ni->ni_intval);
 	bss->config.dsconfig = htole32(ieee80211_chan2ieee(ic, ni->ni_chan));
 	bss->inframode = htole32(NDIS802_11INFRASTRUCTURE);
 	/* XXX verify how this is supposed to look! */
 	memcpy(bss->supprates, ni->ni_rates.rs_rates,
 	    ni->ni_rates.rs_nrates);
 	/* Write the fixed fields of the beacon frame. */
 	fixed = (struct ndis_802_11_fixed_ies *)&bss[1];
 	memcpy(&fixed->tstamp, ni->ni_tstamp.data, 8);
 	fixed->bintval = htole16(ni->ni_intval);
 	fixed->capabilities = htole16(ni->ni_capinfo);
 	/* Write IEs to be included in the association request. */
 	frm = (uint8_t *)&fixed[1];
 	frm = ieee80211_add_rsn(frm, vap);
 	frm = ieee80211_add_wpa(frm, vap);
 	frm = ieee80211_add_qos(frm, ni);
 	if ((ic->ic_flags & IEEE80211_F_WME) &&
 	    (ni->ni_ies.wme_ie != NULL))
 		frm = ieee80211_add_wme_info(frm, &ic->ic_wme);
 	if (ni->ni_flags & IEEE80211_NODE_HT) {
 		frm = ieee80211_add_htcap(frm, ni);
 		frm = ieee80211_add_htinfo(frm, ni);
 	}
 	bss->ieslen = htole32(frm - (uint8_t *)fixed);
 	bss->len = htole32(((frm - buf) + 3) & ~3);
 	RSU_DPRINTF(sc, RSU_DEBUG_RESET | RSU_DEBUG_FWCMD,
 	    "%s: sending join bss command to %s chan %d\n",
 	    __func__,
 	    ether_sprintf(bss->macaddr), le32toh(bss->config.dsconfig));
 	return (rsu_fw_cmd(sc, R92S_CMD_JOIN_BSS, buf, sizeof(buf)));
 }
 
 static int
 rsu_disconnect(struct rsu_softc *sc)
 {
 	uint32_t zero = 0;	/* :-) */
 
 	/* Disassociate from our current BSS. */
 	RSU_DPRINTF(sc, RSU_DEBUG_STATE | RSU_DEBUG_FWCMD,
 	    "%s: sending disconnect command\n", __func__);
 	return (rsu_fw_cmd(sc, R92S_CMD_DISCONNECT, &zero, sizeof(zero)));
 }
 
 /*
  * Map the hardware provided RSSI value to a signal level.
  * For the most part it's just something we divide by and cap
  * so it doesn't overflow the representation by net80211.
  */
 static int
 rsu_hwrssi_to_rssi(struct rsu_softc *sc, int hw_rssi)
 {
 	int v;
 
 	if (hw_rssi == 0)
 		return (0);
 	v = hw_rssi >> 4;
 	if (v > 80)
 		v = 80;
 	return (v);
 }
 
 static void
 rsu_event_survey(struct rsu_softc *sc, uint8_t *buf, int len)
 {
 	struct ieee80211com *ic = &sc->sc_ic;
 	struct ieee80211_frame *wh;
 	struct ndis_wlan_bssid_ex *bss;
 	struct ieee80211_rx_stats rxs;
 	struct mbuf *m;
 	int pktlen;
 
 	if (__predict_false(len < sizeof(*bss)))
 		return;
 	bss = (struct ndis_wlan_bssid_ex *)buf;
 	if (__predict_false(len < sizeof(*bss) + le32toh(bss->ieslen)))
 		return;
 
 	RSU_DPRINTF(sc, RSU_DEBUG_SCAN,
 	    "%s: found BSS %s: len=%d chan=%d inframode=%d "
 	    "networktype=%d privacy=%d, RSSI=%d\n",
 	    __func__,
 	    ether_sprintf(bss->macaddr), le32toh(bss->len),
 	    le32toh(bss->config.dsconfig), le32toh(bss->inframode),
 	    le32toh(bss->networktype), le32toh(bss->privacy),
 	    le32toh(bss->rssi));
 
 	/* Build a fake beacon frame to let net80211 do all the parsing. */
 	/* XXX TODO: just call the new scan API methods! */
 	pktlen = sizeof(*wh) + le32toh(bss->ieslen);
 	if (__predict_false(pktlen > MCLBYTES))
 		return;
 	m = m_get2(pktlen, M_NOWAIT, MT_DATA, M_PKTHDR);
 	if (__predict_false(m == NULL))
 		return;
 	wh = mtod(m, struct ieee80211_frame *);
 	wh->i_fc[0] = IEEE80211_FC0_VERSION_0 | IEEE80211_FC0_TYPE_MGT |
 	    IEEE80211_FC0_SUBTYPE_BEACON;
 	wh->i_fc[1] = IEEE80211_FC1_DIR_NODS;
 	USETW(wh->i_dur, 0);
 	IEEE80211_ADDR_COPY(wh->i_addr1, ieee80211broadcastaddr);
 	IEEE80211_ADDR_COPY(wh->i_addr2, bss->macaddr);
 	IEEE80211_ADDR_COPY(wh->i_addr3, bss->macaddr);
 	*(uint16_t *)wh->i_seq = 0;
 	memcpy(&wh[1], (uint8_t *)&bss[1], le32toh(bss->ieslen));
 
 	/* Finalize mbuf. */
 	m->m_pkthdr.len = m->m_len = pktlen;
 
 	/* Set channel flags for input path */
 	bzero(&rxs, sizeof(rxs));
 	rxs.r_flags |= IEEE80211_R_IEEE | IEEE80211_R_FREQ;
 	rxs.r_flags |= IEEE80211_R_NF | IEEE80211_R_RSSI;
 	rxs.c_ieee = le32toh(bss->config.dsconfig);
 	rxs.c_freq = ieee80211_ieee2mhz(rxs.c_ieee, IEEE80211_CHAN_2GHZ);
 	/* This is a number from 0..100; so let's just divide it down a bit */
 	rxs.c_rssi = le32toh(bss->rssi) / 2;
 	rxs.c_nf = -96;
 	if (ieee80211_add_rx_params(m, &rxs) == 0)
 		return;
 
 	/* XXX avoid a LOR */
 	RSU_UNLOCK(sc);
 	ieee80211_input_mimo_all(ic, m);
 	RSU_LOCK(sc);
 }
 
 static void
 rsu_event_join_bss(struct rsu_softc *sc, uint8_t *buf, int len)
 {
 	struct ieee80211com *ic = &sc->sc_ic;
 	struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps);
 	struct ieee80211_node *ni = vap->iv_bss;
 	struct r92s_event_join_bss *rsp;
 	uint32_t tmp;
 	int res;
 
 	if (__predict_false(len < sizeof(*rsp)))
 		return;
 	rsp = (struct r92s_event_join_bss *)buf;
 	res = (int)le32toh(rsp->join_res);
 
 	RSU_DPRINTF(sc, RSU_DEBUG_STATE | RSU_DEBUG_FWCMD,
 	    "%s: Rx join BSS event len=%d res=%d\n",
 	    __func__, len, res);
 
 	/*
 	 * XXX Don't do this; there's likely a better way to tell
 	 * the caller we failed.
 	 */
 	if (res <= 0) {
 		RSU_UNLOCK(sc);
 		ieee80211_new_state(vap, IEEE80211_S_SCAN, -1);
 		RSU_LOCK(sc);
 		return;
 	}
 
 	tmp = le32toh(rsp->associd);
 	if (tmp >= vap->iv_max_aid) {
 		DPRINTF("Assoc ID overflow\n");
 		tmp = 1;
 	}
 	RSU_DPRINTF(sc, RSU_DEBUG_STATE | RSU_DEBUG_FWCMD,
 	    "%s: associated with %s associd=%d\n",
 	    __func__, ether_sprintf(rsp->bss.macaddr), tmp);
 	/* XXX is this required? What's the top two bits for again? */
 	ni->ni_associd = tmp | 0xc000;
 	RSU_UNLOCK(sc);
 	ieee80211_new_state(vap, IEEE80211_S_RUN,
 	    IEEE80211_FC0_SUBTYPE_ASSOC_RESP);
 	RSU_LOCK(sc);
 }
 
 static void
 rsu_event_addba_req_report(struct rsu_softc *sc, uint8_t *buf, int len)
 {
 	struct ieee80211com *ic = &sc->sc_ic;
 	struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps);
 	struct r92s_add_ba_event *ba = (void *) buf;
 	struct ieee80211_node *ni;
 
 	if (len < sizeof(*ba)) {
 		device_printf(sc->sc_dev, "%s: short read (%d)\n", __func__, len);
 		return;
 	}
 
 	if (vap == NULL)
 		return;
 
 	RSU_DPRINTF(sc, RSU_DEBUG_AMPDU, "%s: mac=%s, tid=%d, ssn=%d\n",
 	    __func__,
 	    ether_sprintf(ba->mac_addr),
 	    (int) ba->tid,
 	    (int) le16toh(ba->ssn));
 
 	/* XXX do node lookup; this is STA specific */
 
 	ni = ieee80211_ref_node(vap->iv_bss);
 	ieee80211_ampdu_rx_start_ext(ni, ba->tid, le16toh(ba->ssn) >> 4, 32);
 	ieee80211_free_node(ni);
 }
 
 static void
 rsu_rx_event(struct rsu_softc *sc, uint8_t code, uint8_t *buf, int len)
 {
 	struct ieee80211com *ic = &sc->sc_ic;
 	struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps);
 
 	RSU_DPRINTF(sc, RSU_DEBUG_RX | RSU_DEBUG_FWCMD,
 	    "%s: Rx event code=%d len=%d\n", __func__, code, len);
 	switch (code) {
 	case R92S_EVT_SURVEY:
 		rsu_event_survey(sc, buf, len);
 		break;
 	case R92S_EVT_SURVEY_DONE:
 		RSU_DPRINTF(sc, RSU_DEBUG_SCAN,
 		    "%s: %s scan done, found %d BSS\n",
 		    __func__, sc->sc_extra_scan ? "direct" : "broadcast",
 		    le32toh(*(uint32_t *)buf));
 		if (sc->sc_extra_scan == 1) {
 			/* Send broadcast probe request. */
 			sc->sc_extra_scan = 0;
 			if (vap != NULL && rsu_site_survey(sc, NULL) != 0) {
 				RSU_UNLOCK(sc);
 				ieee80211_cancel_scan(vap);
 				RSU_LOCK(sc);
 			}
 			break;
 		}
 		if (vap != NULL) {
 			RSU_UNLOCK(sc);
 			ieee80211_scan_done(vap);
 			RSU_LOCK(sc);
 		}
 		break;
 	case R92S_EVT_JOIN_BSS:
 		if (vap->iv_state == IEEE80211_S_AUTH)
 			rsu_event_join_bss(sc, buf, len);
 		break;
 	case R92S_EVT_DEL_STA:
 		RSU_DPRINTF(sc, RSU_DEBUG_FWCMD | RSU_DEBUG_STATE,
 		    "%s: disassociated from %s\n", __func__,
 		    ether_sprintf(buf));
 		if (vap->iv_state == IEEE80211_S_RUN &&
 		    IEEE80211_ADDR_EQ(vap->iv_bss->ni_bssid, buf)) {
 			RSU_UNLOCK(sc);
 			ieee80211_new_state(vap, IEEE80211_S_SCAN, -1);
 			RSU_LOCK(sc);
 		}
 		break;
 	case R92S_EVT_WPS_PBC:
 		RSU_DPRINTF(sc, RSU_DEBUG_RX | RSU_DEBUG_FWCMD,
 		    "%s: WPS PBC pushed.\n", __func__);
 		break;
 	case R92S_EVT_FWDBG:
 		buf[60] = '\0';
 		RSU_DPRINTF(sc, RSU_DEBUG_FWDBG, "FWDBG: %s\n", (char *)buf);
 		break;
 	case R92S_EVT_ADDBA_REQ_REPORT:
 		rsu_event_addba_req_report(sc, buf, len);
 		break;
 	default:
 		device_printf(sc->sc_dev, "%s: unhandled code (%d)\n", __func__, code);
 		break;
 	}
 }
 
 static void
 rsu_rx_multi_event(struct rsu_softc *sc, uint8_t *buf, int len)
 {
 	struct r92s_fw_cmd_hdr *cmd;
 	int cmdsz;
 
 	RSU_DPRINTF(sc, RSU_DEBUG_RX, "%s: Rx events len=%d\n", __func__, len);
 
 	/* Skip Rx status. */
 	buf += sizeof(struct r92s_rx_stat);
 	len -= sizeof(struct r92s_rx_stat);
 
 	/* Process all events. */
 	for (;;) {
 		/* Check that command header fits. */
 		if (__predict_false(len < sizeof(*cmd)))
 			break;
 		cmd = (struct r92s_fw_cmd_hdr *)buf;
 		/* Check that command payload fits. */
 		cmdsz = le16toh(cmd->len);
 		if (__predict_false(len < sizeof(*cmd) + cmdsz))
 			break;
 
 		/* Process firmware event. */
 		rsu_rx_event(sc, cmd->code, (uint8_t *)&cmd[1], cmdsz);
 
 		if (!(cmd->seq & R92S_FW_CMD_MORE))
 			break;
 		buf += sizeof(*cmd) + cmdsz;
 		len -= sizeof(*cmd) + cmdsz;
 	}
 }
 
 #if 0
 static int8_t
 rsu_get_rssi(struct rsu_softc *sc, int rate, void *physt)
 {
 	static const int8_t cckoff[] = { 14, -2, -20, -40 };
 	struct r92s_rx_phystat *phy;
 	struct r92s_rx_cck *cck;
 	uint8_t rpt;
 	int8_t rssi;
 
 	if (rate <= 3) {
 		cck = (struct r92s_rx_cck *)physt;
 		rpt = (cck->agc_rpt >> 6) & 0x3;
 		rssi = cck->agc_rpt & 0x3e;
 		rssi = cckoff[rpt] - rssi;
 	} else {	/* OFDM/HT. */
 		phy = (struct r92s_rx_phystat *)physt;
 		rssi = ((le32toh(phy->phydw1) >> 1) & 0x7f) - 106;
 	}
 	return (rssi);
 }
 #endif
 
 static struct mbuf *
 rsu_rx_frame(struct rsu_softc *sc, uint8_t *buf, int pktlen)
 {
 	struct ieee80211com *ic = &sc->sc_ic;
 	struct ieee80211_frame *wh;
 	struct r92s_rx_stat *stat;
 	uint32_t rxdw0, rxdw3;
 	struct mbuf *m;
 	uint8_t rate;
 	int infosz;
 
 	stat = (struct r92s_rx_stat *)buf;
 	rxdw0 = le32toh(stat->rxdw0);
 	rxdw3 = le32toh(stat->rxdw3);
 
 	if (__predict_false(rxdw0 & R92S_RXDW0_CRCERR)) {
 		counter_u64_add(ic->ic_ierrors, 1);
 		return NULL;
 	}
 	if (__predict_false(pktlen < sizeof(*wh) || pktlen > MCLBYTES)) {
 		counter_u64_add(ic->ic_ierrors, 1);
 		return NULL;
 	}
 
 	rate = MS(rxdw3, R92S_RXDW3_RATE);
 	infosz = MS(rxdw0, R92S_RXDW0_INFOSZ) * 8;
 
 #if 0
 	/* Get RSSI from PHY status descriptor if present. */
 	if (infosz != 0)
 		*rssi = rsu_get_rssi(sc, rate, &stat[1]);
 	else
 		*rssi = 0;
 #endif
 
 	RSU_DPRINTF(sc, RSU_DEBUG_RX,
 	    "%s: Rx frame len=%d rate=%d infosz=%d\n",
 	    __func__, pktlen, rate, infosz);
 
 	m = m_get2(pktlen, M_NOWAIT, MT_DATA, M_PKTHDR);
 	if (__predict_false(m == NULL)) {
 		counter_u64_add(ic->ic_ierrors, 1);
 		return NULL;
 	}
 	/* Hardware does Rx TCP checksum offload. */
 	if (rxdw3 & R92S_RXDW3_TCPCHKVALID) {
 		if (__predict_true(rxdw3 & R92S_RXDW3_TCPCHKRPT))
 			m->m_pkthdr.csum_flags |= CSUM_DATA_VALID;
 	}
 	wh = (struct ieee80211_frame *)((uint8_t *)&stat[1] + infosz);
 	memcpy(mtod(m, uint8_t *), wh, pktlen);
 	m->m_pkthdr.len = m->m_len = pktlen;
 
 	if (ieee80211_radiotap_active(ic)) {
 		struct rsu_rx_radiotap_header *tap = &sc->sc_rxtap;
 
 		/* Map HW rate index to 802.11 rate. */
 		tap->wr_flags = 2;
 		if (!(rxdw3 & R92S_RXDW3_HTC)) {
 			switch (rate) {
 			/* CCK. */
 			case  0: tap->wr_rate =   2; break;
 			case  1: tap->wr_rate =   4; break;
 			case  2: tap->wr_rate =  11; break;
 			case  3: tap->wr_rate =  22; break;
 			/* OFDM. */
 			case  4: tap->wr_rate =  12; break;
 			case  5: tap->wr_rate =  18; break;
 			case  6: tap->wr_rate =  24; break;
 			case  7: tap->wr_rate =  36; break;
 			case  8: tap->wr_rate =  48; break;
 			case  9: tap->wr_rate =  72; break;
 			case 10: tap->wr_rate =  96; break;
 			case 11: tap->wr_rate = 108; break;
 			}
 		} else if (rate >= 12) {	/* MCS0~15. */
 			/* Bit 7 set means HT MCS instead of rate. */
 			tap->wr_rate = 0x80 | (rate - 12);
 		}
 #if 0
 		tap->wr_dbm_antsignal = *rssi;
 #endif
 		/* XXX not nice */
 		tap->wr_dbm_antsignal = rsu_hwrssi_to_rssi(sc, sc->sc_currssi);
 		tap->wr_chan_freq = htole16(ic->ic_curchan->ic_freq);
 		tap->wr_chan_flags = htole16(ic->ic_curchan->ic_flags);
 	}
 
 	return (m);
 }
 
 static struct mbuf *
 rsu_rx_multi_frame(struct rsu_softc *sc, uint8_t *buf, int len)
 {
 	struct r92s_rx_stat *stat;
 	uint32_t rxdw0;
 	int totlen, pktlen, infosz, npkts;
 	struct mbuf *m, *m0 = NULL, *prevm = NULL;
 
 	/* Get the number of encapsulated frames. */
 	stat = (struct r92s_rx_stat *)buf;
 	npkts = MS(le32toh(stat->rxdw2), R92S_RXDW2_PKTCNT);
 	RSU_DPRINTF(sc, RSU_DEBUG_RX,
 	    "%s: Rx %d frames in one chunk\n", __func__, npkts);
 
 	/* Process all of them. */
 	while (npkts-- > 0) {
 		if (__predict_false(len < sizeof(*stat)))
 			break;
 		stat = (struct r92s_rx_stat *)buf;
 		rxdw0 = le32toh(stat->rxdw0);
 
 		pktlen = MS(rxdw0, R92S_RXDW0_PKTLEN);
 		if (__predict_false(pktlen == 0))
 			break;
 
 		infosz = MS(rxdw0, R92S_RXDW0_INFOSZ) * 8;
 
 		/* Make sure everything fits in xfer. */
 		totlen = sizeof(*stat) + infosz + pktlen;
 		if (__predict_false(totlen > len))
 			break;
 
 		/* Process 802.11 frame. */
 		m = rsu_rx_frame(sc, buf, pktlen);
 		if (m0 == NULL)
 			m0 = m;
 		if (prevm == NULL)
 			prevm = m;
 		else {
 			prevm->m_next = m;
 			prevm = m;
 		}
 		/* Next chunk is 128-byte aligned. */
 		totlen = (totlen + 127) & ~127;
 		buf += totlen;
 		len -= totlen;
 	}
 
 	return (m0);
 }
 
 static struct mbuf *
 rsu_rxeof(struct usb_xfer *xfer, struct rsu_data *data)
 {
 	struct rsu_softc *sc = data->sc;
 	struct ieee80211com *ic = &sc->sc_ic;
 	struct r92s_rx_stat *stat;
 	int len;
 
 	usbd_xfer_status(xfer, &len, NULL, NULL, NULL);
 
 	if (__predict_false(len < sizeof(*stat))) {
 		DPRINTF("xfer too short %d\n", len);
 		counter_u64_add(ic->ic_ierrors, 1);
 		return (NULL);
 	}
 	/* Determine if it is a firmware C2H event or an 802.11 frame. */
 	stat = (struct r92s_rx_stat *)data->buf;
 	if ((le32toh(stat->rxdw1) & 0x1ff) == 0x1ff) {
 		rsu_rx_multi_event(sc, data->buf, len);
 		/* No packets to process. */
 		return (NULL);
 	} else
 		return (rsu_rx_multi_frame(sc, data->buf, len));
 }
 
 static void
 rsu_bulk_rx_callback(struct usb_xfer *xfer, usb_error_t error)
 {
 	struct rsu_softc *sc = usbd_xfer_softc(xfer);
 	struct ieee80211com *ic = &sc->sc_ic;
 	struct ieee80211_frame *wh;
 	struct ieee80211_node *ni;
 	struct mbuf *m = NULL, *next;
 	struct rsu_data *data;
 
 	RSU_ASSERT_LOCKED(sc);
 
 	switch (USB_GET_STATE(xfer)) {
 	case USB_ST_TRANSFERRED:
 		data = STAILQ_FIRST(&sc->sc_rx_active);
 		if (data == NULL)
 			goto tr_setup;
 		STAILQ_REMOVE_HEAD(&sc->sc_rx_active, next);
 		m = rsu_rxeof(xfer, data);
 		STAILQ_INSERT_TAIL(&sc->sc_rx_inactive, data, next);
 		/* FALLTHROUGH */
 	case USB_ST_SETUP:
 tr_setup:
 		/*
 		 * XXX TODO: if we have an mbuf list, but then
 		 * we hit data == NULL, what now?
 		 */
 		data = STAILQ_FIRST(&sc->sc_rx_inactive);
 		if (data == NULL) {
 			KASSERT(m == NULL, ("mbuf isn't NULL"));
 			return;
 		}
 		STAILQ_REMOVE_HEAD(&sc->sc_rx_inactive, next);
 		STAILQ_INSERT_TAIL(&sc->sc_rx_active, data, next);
 		usbd_xfer_set_frame_data(xfer, 0, data->buf,
 		    usbd_xfer_max_len(xfer));
 		usbd_transfer_submit(xfer);
 		/*
 		 * To avoid LOR we should unlock our private mutex here to call
 		 * ieee80211_input() because here is at the end of a USB
 		 * callback and safe to unlock.
 		 */
 		RSU_UNLOCK(sc);
 		while (m != NULL) {
 			int rssi;
 
 			/* Cheat and get the last calibrated RSSI */
 			rssi = rsu_hwrssi_to_rssi(sc, sc->sc_currssi);
 
 			next = m->m_next;
 			m->m_next = NULL;
 			wh = mtod(m, struct ieee80211_frame *);
 			ni = ieee80211_find_rxnode(ic,
 			    (struct ieee80211_frame_min *)wh);
 			if (ni != NULL) {
 				if (ni->ni_flags & IEEE80211_NODE_HT)
 					m->m_flags |= M_AMPDU;
 				(void)ieee80211_input(ni, m, rssi, -96);
 				ieee80211_free_node(ni);
 			} else
 				(void)ieee80211_input_all(ic, m, rssi, -96);
 			m = next;
 		}
 		RSU_LOCK(sc);
 		break;
 	default:
 		/* needs it to the inactive queue due to a error. */
 		data = STAILQ_FIRST(&sc->sc_rx_active);
 		if (data != NULL) {
 			STAILQ_REMOVE_HEAD(&sc->sc_rx_active, next);
 			STAILQ_INSERT_TAIL(&sc->sc_rx_inactive, data, next);
 		}
 		if (error != USB_ERR_CANCELLED) {
 			usbd_xfer_set_stall(xfer);
 			counter_u64_add(ic->ic_ierrors, 1);
 			goto tr_setup;
 		}
 		break;
 	}
 
 }
 
 static void
 rsu_txeof(struct usb_xfer *xfer, struct rsu_data *data)
 {
 #ifdef	USB_DEBUG
 	struct rsu_softc *sc = usbd_xfer_softc(xfer);
 #endif
 
 	RSU_DPRINTF(sc, RSU_DEBUG_TXDONE, "%s: called; data=%p\n",
 	    __func__,
 	    data);
 
 	if (data->m) {
 		/* XXX status? */
 		ieee80211_tx_complete(data->ni, data->m, 0);
 		data->m = NULL;
 		data->ni = NULL;
 	}
 }
 
 static void
 rsu_bulk_tx_callback_sub(struct usb_xfer *xfer, usb_error_t error,
     uint8_t which)
 {
 	struct rsu_softc *sc = usbd_xfer_softc(xfer);
 	struct ieee80211com *ic = &sc->sc_ic;
 	struct rsu_data *data;
 
 	RSU_ASSERT_LOCKED(sc);
 
 	switch (USB_GET_STATE(xfer)) {
 	case USB_ST_TRANSFERRED:
 		data = STAILQ_FIRST(&sc->sc_tx_active[which]);
 		if (data == NULL)
 			goto tr_setup;
 		RSU_DPRINTF(sc, RSU_DEBUG_TXDONE, "%s: transfer done %p\n",
 		    __func__, data);
 		STAILQ_REMOVE_HEAD(&sc->sc_tx_active[which], next);
 		rsu_txeof(xfer, data);
 		rsu_freebuf(sc, data);
 		/* FALLTHROUGH */
 	case USB_ST_SETUP:
 tr_setup:
 		data = STAILQ_FIRST(&sc->sc_tx_pending[which]);
 		if (data == NULL) {
 			RSU_DPRINTF(sc, RSU_DEBUG_TXDONE,
 			    "%s: empty pending queue sc %p\n", __func__, sc);
 			return;
 		}
 		STAILQ_REMOVE_HEAD(&sc->sc_tx_pending[which], next);
 		STAILQ_INSERT_TAIL(&sc->sc_tx_active[which], data, next);
 		usbd_xfer_set_frame_data(xfer, 0, data->buf, data->buflen);
 		RSU_DPRINTF(sc, RSU_DEBUG_TXDONE,
 		    "%s: submitting transfer %p\n",
 		    __func__,
 		    data);
 		usbd_transfer_submit(xfer);
 		break;
 	default:
 		data = STAILQ_FIRST(&sc->sc_tx_active[which]);
 		if (data != NULL) {
 			STAILQ_REMOVE_HEAD(&sc->sc_tx_active[which], next);
 			rsu_txeof(xfer, data);
 			rsu_freebuf(sc, data);
 		}
 		counter_u64_add(ic->ic_oerrors, 1);
 
 		if (error != USB_ERR_CANCELLED) {
 			usbd_xfer_set_stall(xfer);
 			goto tr_setup;
 		}
 		break;
 	}
 
 	/*
 	 * XXX TODO: if the queue is low, flush out FF TX frames.
 	 * Remember to unlock the driver for now; net80211 doesn't
 	 * defer it for us.
 	 */
 }
 
 static void
 rsu_bulk_tx_callback_be_bk(struct usb_xfer *xfer, usb_error_t error)
 {
 	struct rsu_softc *sc = usbd_xfer_softc(xfer);
 
 	rsu_bulk_tx_callback_sub(xfer, error, RSU_BULK_TX_BE_BK);
 
 	/* This kicks the TX taskqueue */
 	rsu_start(sc);
 }
 
 static void
 rsu_bulk_tx_callback_vi_vo(struct usb_xfer *xfer, usb_error_t error)
 {
 	struct rsu_softc *sc = usbd_xfer_softc(xfer);
 
 	rsu_bulk_tx_callback_sub(xfer, error, RSU_BULK_TX_VI_VO);
 
 	/* This kicks the TX taskqueue */
 	rsu_start(sc);
 }
 
 static void
 rsu_bulk_tx_callback_h2c(struct usb_xfer *xfer, usb_error_t error)
 {
 	struct rsu_softc *sc = usbd_xfer_softc(xfer);
 
 	rsu_bulk_tx_callback_sub(xfer, error, RSU_BULK_TX_H2C);
 
 	/* This kicks the TX taskqueue */
 	rsu_start(sc);
 }
 
 /*
  * Transmit the given frame.
  *
  * This doesn't free the node or mbuf upon failure.
  */
 static int
 rsu_tx_start(struct rsu_softc *sc, struct ieee80211_node *ni, 
     struct mbuf *m0, struct rsu_data *data)
 {
 	struct ieee80211com *ic = &sc->sc_ic;
         struct ieee80211vap *vap = ni->ni_vap;
 	struct ieee80211_frame *wh;
 	struct ieee80211_key *k = NULL;
 	struct r92s_tx_desc *txd;
 	uint8_t type;
 	int prio = 0;
 	uint8_t which;
 	int hasqos;
 	int xferlen;
 	int qid;
 
 	RSU_ASSERT_LOCKED(sc);
 
 	wh = mtod(m0, struct ieee80211_frame *);
 	type = wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK;
 
 	RSU_DPRINTF(sc, RSU_DEBUG_TX, "%s: data=%p, m=%p\n",
 	    __func__, data, m0);
 
 	if (wh->i_fc[1] & IEEE80211_FC1_PROTECTED) {
 		k = ieee80211_crypto_encap(ni, m0);
 		if (k == NULL) {
 			device_printf(sc->sc_dev,
 			    "ieee80211_crypto_encap returns NULL.\n");
 			/* XXX we don't expect the fragmented frames */
 			return (ENOBUFS);
 		}
 		wh = mtod(m0, struct ieee80211_frame *);
 	}
 	/* If we have QoS then use it */
 	/* XXX TODO: mbuf WME/PRI versus TID? */
 	if (IEEE80211_QOS_HAS_SEQ(wh)) {
 		/* Has QoS */
 		prio = M_WME_GETAC(m0);
 		which = rsu_wme_ac_xfer_map[prio];
 		hasqos = 1;
 	} else {
 		/* Non-QoS TID */
 		/* XXX TODO: tid=0 for non-qos TID? */
 		which = rsu_wme_ac_xfer_map[WME_AC_BE];
 		hasqos = 0;
 		prio = 0;
 	}
 
 	qid = rsu_ac2qid[prio];
 #if 0
 	switch (type) {
 	case IEEE80211_FC0_TYPE_CTL:
 	case IEEE80211_FC0_TYPE_MGT:
 		which = rsu_wme_ac_xfer_map[WME_AC_VO];
 		break;
 	default:
 		which = rsu_wme_ac_xfer_map[M_WME_GETAC(m0)];
 		break;
 	}
 	hasqos = 0;
 #endif
 
 	RSU_DPRINTF(sc, RSU_DEBUG_TX, "%s: pri=%d, which=%d, hasqos=%d\n",
 	    __func__,
 	    prio,
 	    which,
 	    hasqos);
 
 	/* Fill Tx descriptor. */
 	txd = (struct r92s_tx_desc *)data->buf;
 	memset(txd, 0, sizeof(*txd));
 
 	txd->txdw0 |= htole32(
 	    SM(R92S_TXDW0_PKTLEN, m0->m_pkthdr.len) |
 	    SM(R92S_TXDW0_OFFSET, sizeof(*txd)) |
 	    R92S_TXDW0_OWN | R92S_TXDW0_FSG | R92S_TXDW0_LSG);
 
 	txd->txdw1 |= htole32(
 	    SM(R92S_TXDW1_MACID, R92S_MACID_BSS) | SM(R92S_TXDW1_QSEL, qid));
 	if (!hasqos)
 		txd->txdw1 |= htole32(R92S_TXDW1_NONQOS);
 #ifdef notyet
 	if (k != NULL) {
 		switch (k->wk_cipher->ic_cipher) {
 		case IEEE80211_CIPHER_WEP:
 			cipher = R92S_TXDW1_CIPHER_WEP;
 			break;
 		case IEEE80211_CIPHER_TKIP:
 			cipher = R92S_TXDW1_CIPHER_TKIP;
 			break;
 		case IEEE80211_CIPHER_AES_CCM:
 			cipher = R92S_TXDW1_CIPHER_AES;
 			break;
 		default:
 			cipher = R92S_TXDW1_CIPHER_NONE;
 		}
 		txd->txdw1 |= htole32(
 		    SM(R92S_TXDW1_CIPHER, cipher) |
 		    SM(R92S_TXDW1_KEYIDX, k->k_id));
 	}
 #endif
 	/* XXX todo: set AGGEN bit if appropriate? */
 	txd->txdw2 |= htole32(R92S_TXDW2_BK);
 	if (IEEE80211_IS_MULTICAST(wh->i_addr1))
 		txd->txdw2 |= htole32(R92S_TXDW2_BMCAST);
 	/*
 	 * Firmware will use and increment the sequence number for the
 	 * specified priority.
 	 */
 	txd->txdw3 |= htole32(SM(R92S_TXDW3_SEQ, prio));
 
 	if (ieee80211_radiotap_active_vap(vap)) {
 		struct rsu_tx_radiotap_header *tap = &sc->sc_txtap;
 
 		tap->wt_flags = 0;
 		tap->wt_chan_freq = htole16(ic->ic_curchan->ic_freq);
 		tap->wt_chan_flags = htole16(ic->ic_curchan->ic_flags);
 		ieee80211_radiotap_tx(vap, m0);
 	}
 
 	xferlen = sizeof(*txd) + m0->m_pkthdr.len;
 	m_copydata(m0, 0, m0->m_pkthdr.len, (caddr_t)&txd[1]);
 
 	data->buflen = xferlen;
 	data->ni = ni;
 	data->m = m0;
 	STAILQ_INSERT_TAIL(&sc->sc_tx_pending[which], data, next);
 
 	/* start transfer, if any */
 	usbd_transfer_start(sc->sc_xfer[which]);
 	return (0);
 }
 
 static int
 rsu_transmit(struct ieee80211com *ic, struct mbuf *m)   
 {
 	struct rsu_softc *sc = ic->ic_softc;
 	int error;
 
 	RSU_LOCK(sc);
 	if (!sc->sc_running) {
 		RSU_UNLOCK(sc);
 		return (ENXIO);
 	}
 
 	/*
 	 * XXX TODO: ensure that we treat 'm' as a list of frames
 	 * to transmit!
 	 */
 	error = mbufq_enqueue(&sc->sc_snd, m);
 	if (error) {
 		RSU_DPRINTF(sc, RSU_DEBUG_TX,
 		    "%s: mbufq_enable: failed (%d)\n",
 		    __func__,
 		    error);
 		RSU_UNLOCK(sc);
 		return (error);
 	}
 	RSU_UNLOCK(sc);
 
 	/* This kicks the TX taskqueue */
 	rsu_start(sc);
 
 	return (0);
 }
 
 static void
 rsu_drain_mbufq(struct rsu_softc *sc)
 {
 	struct mbuf *m;
 	struct ieee80211_node *ni;
 
 	RSU_ASSERT_LOCKED(sc);
 	while ((m = mbufq_dequeue(&sc->sc_snd)) != NULL) {
 		ni = (struct ieee80211_node *)m->m_pkthdr.rcvif;
 		m->m_pkthdr.rcvif = NULL;
 		ieee80211_free_node(ni);
 		m_freem(m);
 	}
 }
 
 static void
 _rsu_start(struct rsu_softc *sc)
 {
 	struct ieee80211_node *ni;
 	struct rsu_data *bf;
 	struct mbuf *m;
 
 	RSU_ASSERT_LOCKED(sc);
 
 	while ((m = mbufq_dequeue(&sc->sc_snd)) != NULL) {
 		bf = rsu_getbuf(sc);
 		if (bf == NULL) {
 			RSU_DPRINTF(sc, RSU_DEBUG_TX,
 			    "%s: failed to get buffer\n", __func__);
 			mbufq_prepend(&sc->sc_snd, m);
 			break;
 		}
 
 		ni = (struct ieee80211_node *)m->m_pkthdr.rcvif;
 		m->m_pkthdr.rcvif = NULL;
 
 		if (rsu_tx_start(sc, ni, m, bf) != 0) {
 			RSU_DPRINTF(sc, RSU_DEBUG_TX,
 			    "%s: failed to transmit\n", __func__);
 			if_inc_counter(ni->ni_vap->iv_ifp,
 			    IFCOUNTER_OERRORS, 1);
 			rsu_freebuf(sc, bf);
 			ieee80211_free_node(ni);
 			m_freem(m);
 			break;
 		}
 	}
 }
 
 static void
 rsu_start(struct rsu_softc *sc)
 {
 
 	taskqueue_enqueue(taskqueue_thread, &sc->tx_task);
 }
 
 static void
 rsu_parent(struct ieee80211com *ic)
 {
 	struct rsu_softc *sc = ic->ic_softc;
 	int startall = 0;
 
 	RSU_LOCK(sc);
 	if (ic->ic_nrunning > 0) {
 		if (!sc->sc_running) {
 			rsu_init(sc);
 			startall = 1;
 		}
 	} else if (sc->sc_running)
 		rsu_stop(sc);
 	RSU_UNLOCK(sc);
 
 	if (startall)
 		ieee80211_start_all(ic);
 }
 
 /*
  * Power on sequence for A-cut adapters.
  */
 static void
 rsu_power_on_acut(struct rsu_softc *sc)
 {
 	uint32_t reg;
 
 	rsu_write_1(sc, R92S_SPS0_CTRL + 1, 0x53);
 	rsu_write_1(sc, R92S_SPS0_CTRL + 0, 0x57);
 
 	/* Enable AFE macro block's bandgap and Mbias. */
 	rsu_write_1(sc, R92S_AFE_MISC,
 	    rsu_read_1(sc, R92S_AFE_MISC) |
 	    R92S_AFE_MISC_BGEN | R92S_AFE_MISC_MBEN);
 	/* Enable LDOA15 block. */
 	rsu_write_1(sc, R92S_LDOA15_CTRL,
 	    rsu_read_1(sc, R92S_LDOA15_CTRL) | R92S_LDA15_EN);
 
 	rsu_write_1(sc, R92S_SPS1_CTRL,
 	    rsu_read_1(sc, R92S_SPS1_CTRL) | R92S_SPS1_LDEN);
 	rsu_ms_delay(sc, 2000);
 	/* Enable switch regulator block. */
 	rsu_write_1(sc, R92S_SPS1_CTRL,
 	    rsu_read_1(sc, R92S_SPS1_CTRL) | R92S_SPS1_SWEN);
 
 	rsu_write_4(sc, R92S_SPS1_CTRL, 0x00a7b267);
 
 	rsu_write_1(sc, R92S_SYS_ISO_CTRL + 1,
 	    rsu_read_1(sc, R92S_SYS_ISO_CTRL + 1) | 0x08);
 
 	rsu_write_1(sc, R92S_SYS_FUNC_EN + 1,
 	    rsu_read_1(sc, R92S_SYS_FUNC_EN + 1) | 0x20);
 
 	rsu_write_1(sc, R92S_SYS_ISO_CTRL + 1,
 	    rsu_read_1(sc, R92S_SYS_ISO_CTRL + 1) & ~0x90);
 
 	/* Enable AFE clock. */
 	rsu_write_1(sc, R92S_AFE_XTAL_CTRL + 1,
 	    rsu_read_1(sc, R92S_AFE_XTAL_CTRL + 1) & ~0x04);
 	/* Enable AFE PLL macro block. */
 	rsu_write_1(sc, R92S_AFE_PLL_CTRL,
 	    rsu_read_1(sc, R92S_AFE_PLL_CTRL) | 0x11);
 	/* Attach AFE PLL to MACTOP/BB. */
 	rsu_write_1(sc, R92S_SYS_ISO_CTRL,
 	    rsu_read_1(sc, R92S_SYS_ISO_CTRL) & ~0x11);
 
 	/* Switch to 40MHz clock instead of 80MHz. */
 	rsu_write_2(sc, R92S_SYS_CLKR,
 	    rsu_read_2(sc, R92S_SYS_CLKR) & ~R92S_SYS_CLKSEL);
 
 	/* Enable MAC clock. */
 	rsu_write_2(sc, R92S_SYS_CLKR,
 	    rsu_read_2(sc, R92S_SYS_CLKR) |
 	    R92S_MAC_CLK_EN | R92S_SYS_CLK_EN);
 
 	rsu_write_1(sc, R92S_PMC_FSM, 0x02);
 
 	/* Enable digital core and IOREG R/W. */
 	rsu_write_1(sc, R92S_SYS_FUNC_EN + 1,
 	    rsu_read_1(sc, R92S_SYS_FUNC_EN + 1) | 0x08);
 
 	rsu_write_1(sc, R92S_SYS_FUNC_EN + 1,
 	    rsu_read_1(sc, R92S_SYS_FUNC_EN + 1) | 0x80);
 
 	/* Switch the control path to firmware. */
 	reg = rsu_read_2(sc, R92S_SYS_CLKR);
 	reg = (reg & ~R92S_SWHW_SEL) | R92S_FWHW_SEL;
 	rsu_write_2(sc, R92S_SYS_CLKR, reg);
 
 	rsu_write_2(sc, R92S_CR, 0x37fc);
 
 	/* Fix USB RX FIFO issue. */
 	rsu_write_1(sc, 0xfe5c,
 	    rsu_read_1(sc, 0xfe5c) | 0x80);
 	rsu_write_1(sc, 0x00ab,
 	    rsu_read_1(sc, 0x00ab) | 0xc0);
 
 	rsu_write_1(sc, R92S_SYS_CLKR,
 	    rsu_read_1(sc, R92S_SYS_CLKR) & ~R92S_SYS_CPU_CLKSEL);
 }
 
 /*
  * Power on sequence for B-cut and C-cut adapters.
  */
 static void
 rsu_power_on_bcut(struct rsu_softc *sc)
 {
 	uint32_t reg;
 	int ntries;
 
 	/* Prevent eFuse leakage. */
 	rsu_write_1(sc, 0x37, 0xb0);
 	rsu_ms_delay(sc, 10);
 	rsu_write_1(sc, 0x37, 0x30);
 
 	/* Switch the control path to hardware. */
 	reg = rsu_read_2(sc, R92S_SYS_CLKR);
 	if (reg & R92S_FWHW_SEL) {
 		rsu_write_2(sc, R92S_SYS_CLKR,
 		    reg & ~(R92S_SWHW_SEL | R92S_FWHW_SEL));
 	}
 	rsu_write_1(sc, R92S_SYS_FUNC_EN + 1,
 	    rsu_read_1(sc, R92S_SYS_FUNC_EN + 1) & ~0x8c);
 	rsu_ms_delay(sc, 1);
 
 	rsu_write_1(sc, R92S_SPS0_CTRL + 1, 0x53);
 	rsu_write_1(sc, R92S_SPS0_CTRL + 0, 0x57);
 
 	reg = rsu_read_1(sc, R92S_AFE_MISC);
 	rsu_write_1(sc, R92S_AFE_MISC, reg | R92S_AFE_MISC_BGEN);
 	rsu_write_1(sc, R92S_AFE_MISC, reg | R92S_AFE_MISC_BGEN |
 	    R92S_AFE_MISC_MBEN | R92S_AFE_MISC_I32_EN);
 
 	/* Enable PLL. */
 	rsu_write_1(sc, R92S_LDOA15_CTRL,
 	    rsu_read_1(sc, R92S_LDOA15_CTRL) | R92S_LDA15_EN);
 
 	rsu_write_1(sc, R92S_LDOV12D_CTRL,
 	    rsu_read_1(sc, R92S_LDOV12D_CTRL) | R92S_LDV12_EN);
 
 	rsu_write_1(sc, R92S_SYS_ISO_CTRL + 1,
 	    rsu_read_1(sc, R92S_SYS_ISO_CTRL + 1) | 0x08);
 
 	rsu_write_1(sc, R92S_SYS_FUNC_EN + 1,
 	    rsu_read_1(sc, R92S_SYS_FUNC_EN + 1) | 0x20);
 
 	/* Support 64KB IMEM. */
 	rsu_write_1(sc, R92S_SYS_ISO_CTRL + 1,
 	    rsu_read_1(sc, R92S_SYS_ISO_CTRL + 1) & ~0x97);
 
 	/* Enable AFE clock. */
 	rsu_write_1(sc, R92S_AFE_XTAL_CTRL + 1,
 	    rsu_read_1(sc, R92S_AFE_XTAL_CTRL + 1) & ~0x04);
 	/* Enable AFE PLL macro block. */
 	reg = rsu_read_1(sc, R92S_AFE_PLL_CTRL);
 	rsu_write_1(sc, R92S_AFE_PLL_CTRL, reg | 0x11);
 	rsu_ms_delay(sc, 1);
 	rsu_write_1(sc, R92S_AFE_PLL_CTRL, reg | 0x51);
 	rsu_ms_delay(sc, 1);
 	rsu_write_1(sc, R92S_AFE_PLL_CTRL, reg | 0x11);
 	rsu_ms_delay(sc, 1);
 
 	/* Attach AFE PLL to MACTOP/BB. */
 	rsu_write_1(sc, R92S_SYS_ISO_CTRL,
 	    rsu_read_1(sc, R92S_SYS_ISO_CTRL) & ~0x11);
 
 	/* Switch to 40MHz clock. */
 	rsu_write_1(sc, R92S_SYS_CLKR, 0x00);
 	/* Disable CPU clock and 80MHz SSC. */
 	rsu_write_1(sc, R92S_SYS_CLKR,
 	    rsu_read_1(sc, R92S_SYS_CLKR) | 0xa0);
 	/* Enable MAC clock. */
 	rsu_write_2(sc, R92S_SYS_CLKR,
 	    rsu_read_2(sc, R92S_SYS_CLKR) |
 	    R92S_MAC_CLK_EN | R92S_SYS_CLK_EN);
 
 	rsu_write_1(sc, R92S_PMC_FSM, 0x02);
 
 	/* Enable digital core and IOREG R/W. */
 	rsu_write_1(sc, R92S_SYS_FUNC_EN + 1,
 	    rsu_read_1(sc, R92S_SYS_FUNC_EN + 1) | 0x08);
 
 	rsu_write_1(sc, R92S_SYS_FUNC_EN + 1,
 	    rsu_read_1(sc, R92S_SYS_FUNC_EN + 1) | 0x80);
 
 	/* Switch the control path to firmware. */
 	reg = rsu_read_2(sc, R92S_SYS_CLKR);
 	reg = (reg & ~R92S_SWHW_SEL) | R92S_FWHW_SEL;
 	rsu_write_2(sc, R92S_SYS_CLKR, reg);
 
 	rsu_write_2(sc, R92S_CR, 0x37fc);
 
 	/* Fix USB RX FIFO issue. */
 	rsu_write_1(sc, 0xfe5c,
 	    rsu_read_1(sc, 0xfe5c) | 0x80);
 
 	rsu_write_1(sc, R92S_SYS_CLKR,
 	    rsu_read_1(sc, R92S_SYS_CLKR) & ~R92S_SYS_CPU_CLKSEL);
 
 	rsu_write_1(sc, 0xfe1c, 0x80);
 
 	/* Make sure TxDMA is ready to download firmware. */
 	for (ntries = 0; ntries < 20; ntries++) {
 		reg = rsu_read_1(sc, R92S_TCR);
 		if ((reg & (R92S_TCR_IMEM_CHK_RPT | R92S_TCR_EMEM_CHK_RPT)) ==
 		    (R92S_TCR_IMEM_CHK_RPT | R92S_TCR_EMEM_CHK_RPT))
 			break;
 		rsu_ms_delay(sc, 1);
 	}
 	if (ntries == 20) {
 		RSU_DPRINTF(sc, RSU_DEBUG_RESET | RSU_DEBUG_TX,
 		    "%s: TxDMA is not ready\n",
 		    __func__);
 		/* Reset TxDMA. */
 		reg = rsu_read_1(sc, R92S_CR);
 		rsu_write_1(sc, R92S_CR, reg & ~R92S_CR_TXDMA_EN);
 		rsu_ms_delay(sc, 1);
 		rsu_write_1(sc, R92S_CR, reg | R92S_CR_TXDMA_EN);
 	}
 }
 
 static void
 rsu_power_off(struct rsu_softc *sc)
 {
 	/* Turn RF off. */
 	rsu_write_1(sc, R92S_RF_CTRL, 0x00);
 	rsu_ms_delay(sc, 5);
 
 	/* Turn MAC off. */
 	/* Switch control path. */
 	rsu_write_1(sc, R92S_SYS_CLKR + 1, 0x38);
 	/* Reset MACTOP. */
 	rsu_write_1(sc, R92S_SYS_FUNC_EN + 1, 0x70);
 	rsu_write_1(sc, R92S_PMC_FSM, 0x06);
 	rsu_write_1(sc, R92S_SYS_ISO_CTRL + 0, 0xf9);
 	rsu_write_1(sc, R92S_SYS_ISO_CTRL + 1, 0xe8);
 
 	/* Disable AFE PLL. */
 	rsu_write_1(sc, R92S_AFE_PLL_CTRL, 0x00);
 	/* Disable A15V. */
 	rsu_write_1(sc, R92S_LDOA15_CTRL, 0x54);
 	/* Disable eFuse 1.2V. */
 	rsu_write_1(sc, R92S_SYS_FUNC_EN + 1, 0x50);
 	rsu_write_1(sc, R92S_LDOV12D_CTRL, 0x24);
 	/* Enable AFE macro block's bandgap and Mbias. */
 	rsu_write_1(sc, R92S_AFE_MISC, 0x30);
 	/* Disable 1.6V LDO. */
 	rsu_write_1(sc, R92S_SPS0_CTRL + 0, 0x56);
 	rsu_write_1(sc, R92S_SPS0_CTRL + 1, 0x43);
 
 	/* Firmware - tell it to switch things off */
 	(void) rsu_set_fw_power_state(sc, RSU_PWR_OFF);
 }
 
 static int
 rsu_fw_loadsection(struct rsu_softc *sc, const uint8_t *buf, int len)
 {
 	const uint8_t which = rsu_wme_ac_xfer_map[WME_AC_VO];
 	struct rsu_data *data;
 	struct r92s_tx_desc *txd;
 	int mlen;
 
 	while (len > 0) {
 		data = rsu_getbuf(sc);
 		if (data == NULL)
 			return (ENOMEM);
 		txd = (struct r92s_tx_desc *)data->buf;
 		memset(txd, 0, sizeof(*txd));
 		if (len <= RSU_TXBUFSZ - sizeof(*txd)) {
 			/* Last chunk. */
 			txd->txdw0 |= htole32(R92S_TXDW0_LINIP);
 			mlen = len;
 		} else
 			mlen = RSU_TXBUFSZ - sizeof(*txd);
 		txd->txdw0 |= htole32(SM(R92S_TXDW0_PKTLEN, mlen));
 		memcpy(&txd[1], buf, mlen);
 		data->buflen = sizeof(*txd) + mlen;
 		RSU_DPRINTF(sc, RSU_DEBUG_TX | RSU_DEBUG_FW | RSU_DEBUG_RESET,
 		    "%s: starting transfer %p\n",
 		    __func__, data);
 		STAILQ_INSERT_TAIL(&sc->sc_tx_pending[which], data, next);
 		buf += mlen;
 		len -= mlen;
 	}
 	usbd_transfer_start(sc->sc_xfer[which]);
 	return (0);
 }
 
 static int
 rsu_load_firmware(struct rsu_softc *sc)
 {
 	const struct r92s_fw_hdr *hdr;
 	struct r92s_fw_priv *dmem;
 	struct ieee80211com *ic = &sc->sc_ic;
 	const uint8_t *imem, *emem;
 	int imemsz, ememsz;
 	const struct firmware *fw;
 	size_t size;
 	uint32_t reg;
 	int ntries, error;
 
 	if (rsu_read_1(sc, R92S_TCR) & R92S_TCR_FWRDY) {
 		RSU_DPRINTF(sc, RSU_DEBUG_ANY,
 		    "%s: Firmware already loaded\n",
 		    __func__);
 		return (0);
 	}
 
 	RSU_UNLOCK(sc);
 	/* Read firmware image from the filesystem. */
 	if ((fw = firmware_get("rsu-rtl8712fw")) == NULL) {
 		device_printf(sc->sc_dev, 
 		    "%s: failed load firmware of file rsu-rtl8712fw\n",
 		    __func__);
 		RSU_LOCK(sc);
 		return (ENXIO);
 	}
 	RSU_LOCK(sc);
 	size = fw->datasize;
 	if (size < sizeof(*hdr)) {
 		device_printf(sc->sc_dev, "firmware too short\n");
 		error = EINVAL;
 		goto fail;
 	}
 	hdr = (const struct r92s_fw_hdr *)fw->data;
 	if (hdr->signature != htole16(0x8712) &&
 	    hdr->signature != htole16(0x8192)) {
 		device_printf(sc->sc_dev,
 		    "invalid firmware signature 0x%x\n",
 		    le16toh(hdr->signature));
 		error = EINVAL;
 		goto fail;
 	}
 	DPRINTF("FW V%d %02x-%02x %02x:%02x\n", le16toh(hdr->version),
 	    hdr->month, hdr->day, hdr->hour, hdr->minute);
 
 	/* Make sure that driver and firmware are in sync. */
 	if (hdr->privsz != htole32(sizeof(*dmem))) {
 		device_printf(sc->sc_dev, "unsupported firmware image\n");
 		error = EINVAL;
 		goto fail;
 	}
 	/* Get FW sections sizes. */
 	imemsz = le32toh(hdr->imemsz);
 	ememsz = le32toh(hdr->sramsz);
 	/* Check that all FW sections fit in image. */
 	if (size < sizeof(*hdr) + imemsz + ememsz) {
 		device_printf(sc->sc_dev, "firmware too short\n");
 		error = EINVAL;
 		goto fail;
 	}
 	imem = (const uint8_t *)&hdr[1];
 	emem = imem + imemsz;
 
 	/* Load IMEM section. */
 	error = rsu_fw_loadsection(sc, imem, imemsz);
 	if (error != 0) {
 		device_printf(sc->sc_dev,
 		    "could not load firmware section %s\n", "IMEM");
 		goto fail;
 	}
 	/* Wait for load to complete. */
 	for (ntries = 0; ntries != 50; ntries++) {
 		rsu_ms_delay(sc, 10);
 		reg = rsu_read_1(sc, R92S_TCR);
 		if (reg & R92S_TCR_IMEM_CODE_DONE)
 			break;
 	}
 	if (ntries == 50) {
 		device_printf(sc->sc_dev, "timeout waiting for IMEM transfer\n");
 		error = ETIMEDOUT;
 		goto fail;
 	}
 	/* Load EMEM section. */
 	error = rsu_fw_loadsection(sc, emem, ememsz);
 	if (error != 0) {
 		device_printf(sc->sc_dev,
 		    "could not load firmware section %s\n", "EMEM");
 		goto fail;
 	}
 	/* Wait for load to complete. */
 	for (ntries = 0; ntries != 50; ntries++) {
 		rsu_ms_delay(sc, 10);
 		reg = rsu_read_2(sc, R92S_TCR);
 		if (reg & R92S_TCR_EMEM_CODE_DONE)
 			break;
 	}
 	if (ntries == 50) {
 		device_printf(sc->sc_dev, "timeout waiting for EMEM transfer\n");
 		error = ETIMEDOUT;
 		goto fail;
 	}
 	/* Enable CPU. */
 	rsu_write_1(sc, R92S_SYS_CLKR,
 	    rsu_read_1(sc, R92S_SYS_CLKR) | R92S_SYS_CPU_CLKSEL);
 	if (!(rsu_read_1(sc, R92S_SYS_CLKR) & R92S_SYS_CPU_CLKSEL)) {
 		device_printf(sc->sc_dev, "could not enable system clock\n");
 		error = EIO;
 		goto fail;
 	}
 	rsu_write_2(sc, R92S_SYS_FUNC_EN,
 	    rsu_read_2(sc, R92S_SYS_FUNC_EN) | R92S_FEN_CPUEN);
 	if (!(rsu_read_2(sc, R92S_SYS_FUNC_EN) & R92S_FEN_CPUEN)) {
 		device_printf(sc->sc_dev, 
 		    "could not enable microcontroller\n");
 		error = EIO;
 		goto fail;
 	}
 	/* Wait for CPU to initialize. */
 	for (ntries = 0; ntries < 100; ntries++) {
 		if (rsu_read_1(sc, R92S_TCR) & R92S_TCR_IMEM_RDY)
 			break;
 		rsu_ms_delay(sc, 1);
 	}
 	if (ntries == 100) {
 		device_printf(sc->sc_dev,
 		    "timeout waiting for microcontroller\n");
 		error = ETIMEDOUT;
 		goto fail;
 	}
 
 	/* Update DMEM section before loading. */
 	dmem = __DECONST(struct r92s_fw_priv *, &hdr->priv);
 	memset(dmem, 0, sizeof(*dmem));
 	dmem->hci_sel = R92S_HCI_SEL_USB | R92S_HCI_SEL_8172;
 	dmem->nendpoints = sc->sc_nendpoints;
 	dmem->chip_version = sc->cut;
 	dmem->rf_config = sc->sc_rftype;
 	dmem->vcs_type = R92S_VCS_TYPE_AUTO;
 	dmem->vcs_mode = R92S_VCS_MODE_RTS_CTS;
 	dmem->turbo_mode = 0;
 	dmem->bw40_en = !! (ic->ic_htcaps & IEEE80211_HTCAP_CHWIDTH40);
 	dmem->amsdu2ampdu_en = !! (sc->sc_ht);
 	dmem->ampdu_en = !! (sc->sc_ht);
 	dmem->agg_offload = !! (sc->sc_ht);
 	dmem->qos_en = 1;
 	dmem->ps_offload = 1;
 	dmem->lowpower_mode = 1;	/* XXX TODO: configurable? */
 	/* Load DMEM section. */
 	error = rsu_fw_loadsection(sc, (uint8_t *)dmem, sizeof(*dmem));
 	if (error != 0) {
 		device_printf(sc->sc_dev,
 		    "could not load firmware section %s\n", "DMEM");
 		goto fail;
 	}
 	/* Wait for load to complete. */
 	for (ntries = 0; ntries < 100; ntries++) {
 		if (rsu_read_1(sc, R92S_TCR) & R92S_TCR_DMEM_CODE_DONE)
 			break;
 		rsu_ms_delay(sc, 1);
 	}
 	if (ntries == 100) {
 		device_printf(sc->sc_dev, "timeout waiting for %s transfer\n",
 		    "DMEM");
 		error = ETIMEDOUT;
 		goto fail;
 	}
 	/* Wait for firmware readiness. */
 	for (ntries = 0; ntries < 60; ntries++) {
 		if (!(rsu_read_1(sc, R92S_TCR) & R92S_TCR_FWRDY))
 			break;
 		rsu_ms_delay(sc, 1);
 	}
 	if (ntries == 60) {
 		device_printf(sc->sc_dev, 
 		    "timeout waiting for firmware readiness\n");
 		error = ETIMEDOUT;
 		goto fail;
 	}
  fail:
 	firmware_put(fw, FIRMWARE_UNLOAD);
 	return (error);
 }
 
 
 static int	
 rsu_raw_xmit(struct ieee80211_node *ni, struct mbuf *m, 
     const struct ieee80211_bpf_params *params)
 {
 	struct ieee80211com *ic = ni->ni_ic;
 	struct rsu_softc *sc = ic->ic_softc;
 	struct rsu_data *bf;
 
 	/* prevent management frames from being sent if we're not ready */
 	if (!sc->sc_running) {
 		m_freem(m);
 		return (ENETDOWN);
 	}
 	RSU_LOCK(sc);
 	bf = rsu_getbuf(sc);
 	if (bf == NULL) {
 		m_freem(m);
 		RSU_UNLOCK(sc);
 		return (ENOBUFS);
 	}
 	if (rsu_tx_start(sc, ni, m, bf) != 0) {
 		m_freem(m);
 		rsu_freebuf(sc, bf);
 		RSU_UNLOCK(sc);
 		return (EIO);
 	}
 	RSU_UNLOCK(sc);
 
 	return (0);
 }
 
 static void
 rsu_init(struct rsu_softc *sc)
 {
 	struct ieee80211com *ic = &sc->sc_ic;
 	struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps);
 	uint8_t macaddr[IEEE80211_ADDR_LEN];
 	int error;
 	int i;
 
 	RSU_ASSERT_LOCKED(sc);
 
 	/* Ensure the mbuf queue is drained */
 	rsu_drain_mbufq(sc);
 
 	/* Init host async commands ring. */
 	sc->cmdq.cur = sc->cmdq.next = sc->cmdq.queued = 0;
 
 	/* Reset power management state. */
 	rsu_write_1(sc, R92S_USB_HRPWM, 0);
 
 	/* Power on adapter. */
 	if (sc->cut == 1)
 		rsu_power_on_acut(sc);
 	else
 		rsu_power_on_bcut(sc);
 
 	/* Load firmware. */
 	error = rsu_load_firmware(sc);
 	if (error != 0)
 		goto fail;
 
 	/* Enable Rx TCP checksum offload. */
 	rsu_write_4(sc, R92S_RCR,
 	    rsu_read_4(sc, R92S_RCR) | 0x04000000);
 	/* Append PHY status. */
 	rsu_write_4(sc, R92S_RCR,
 	    rsu_read_4(sc, R92S_RCR) | 0x02000000);
 
 	rsu_write_4(sc, R92S_CR,
 	    rsu_read_4(sc, R92S_CR) & ~0xff000000);
 
 	/* Use 128 bytes pages. */
 	rsu_write_1(sc, 0x00b5,
 	    rsu_read_1(sc, 0x00b5) | 0x01);
 	/* Enable USB Rx aggregation. */
 	rsu_write_1(sc, 0x00bd,
 	    rsu_read_1(sc, 0x00bd) | 0x80);
 	/* Set USB Rx aggregation threshold. */
 	rsu_write_1(sc, 0x00d9, 0x01);
 	/* Set USB Rx aggregation timeout (1.7ms/4). */
 	rsu_write_1(sc, 0xfe5b, 0x04);
 	/* Fix USB Rx FIFO issue. */
 	rsu_write_1(sc, 0xfe5c,
 	    rsu_read_1(sc, 0xfe5c) | 0x80);
 
 	/* Set MAC address. */
 	IEEE80211_ADDR_COPY(macaddr, vap ? vap->iv_myaddr : ic->ic_macaddr);
 	rsu_write_region_1(sc, R92S_MACID, macaddr, IEEE80211_ADDR_LEN);
 
 	/* It really takes 1.5 seconds for the firmware to boot: */
 	rsu_ms_delay(sc, 2000);
 
 	RSU_DPRINTF(sc, RSU_DEBUG_RESET, "%s: setting MAC address to %s\n",
 	    __func__,
 	    ether_sprintf(macaddr));
 	error = rsu_fw_cmd(sc, R92S_CMD_SET_MAC_ADDRESS, macaddr,
 	    IEEE80211_ADDR_LEN);
 	if (error != 0) {
 		device_printf(sc->sc_dev, "could not set MAC address\n");
 		goto fail;
 	}
+
+	/* Setup multicast filter (must be done after firmware loading). */
+	rsu_set_multi(sc);
 
 	/* Set PS mode fully active */
 	error = rsu_set_fw_power_state(sc, RSU_PWR_ACTIVE);
 
 	if (error != 0) {
 		device_printf(sc->sc_dev, "could not set PS mode\n");
 		goto fail;
 	}
 
 	sc->sc_extra_scan = 0;
 	usbd_transfer_start(sc->sc_xfer[RSU_BULK_RX]);
 
 	/* We're ready to go. */
 	sc->sc_running = 1;
 	return;
 fail:
 	/* Need to stop all failed transfers, if any */
 	for (i = 0; i != RSU_N_TRANSFER; i++)
 		usbd_transfer_stop(sc->sc_xfer[i]);
 }
 
 static void
 rsu_stop(struct rsu_softc *sc)
 {
 	int i;
 
 	RSU_ASSERT_LOCKED(sc);
 
 	sc->sc_running = 0;
 	sc->sc_calibrating = 0;
 	taskqueue_cancel_timeout(taskqueue_thread, &sc->calib_task, NULL);
 	taskqueue_cancel(taskqueue_thread, &sc->tx_task, NULL);
 
 	/* Power off adapter. */
 	rsu_power_off(sc);
 
 	for (i = 0; i < RSU_N_TRANSFER; i++)
 		usbd_transfer_stop(sc->sc_xfer[i]);
 
 	/* Ensure the mbuf queue is drained */
 	rsu_drain_mbufq(sc);
 }
 
 /*
  * Note: usb_pause_mtx() actually releases the mutex before calling pause(),
  * which breaks any kind of driver serialisation.
  */
 static void
 rsu_ms_delay(struct rsu_softc *sc, int ms)
 {
 
 	//usb_pause_mtx(&sc->sc_mtx, hz / 1000);
 	DELAY(ms * 1000);
 }
Index: projects/clang391-import/sys/dev/usb/wlan/if_rsureg.h
===================================================================
--- projects/clang391-import/sys/dev/usb/wlan/if_rsureg.h	(revision 309262)
+++ projects/clang391-import/sys/dev/usb/wlan/if_rsureg.h	(revision 309263)
@@ -1,802 +1,803 @@
 /*-
  * Copyright (c) 2010 Damien Bergamini <damien.bergamini@free.fr>
  *
  * Permission to use, copy, modify, and distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
  * copyright notice and this permission notice appear in all copies.
  *
  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  *
  * $OpenBSD: if_rsureg.h,v 1.3 2013/04/15 09:23:01 mglocker Exp $
  * $FreeBSD$
  */
 
 /* USB Requests. */
 #define R92S_REQ_REGS	0x05
 
 /*
  * MAC registers.
  */
 #define R92S_SYSCFG		0x0000
 #define R92S_SYS_ISO_CTRL	(R92S_SYSCFG + 0x000)
 #define R92S_SYS_FUNC_EN	(R92S_SYSCFG + 0x002)
 #define R92S_PMC_FSM		(R92S_SYSCFG + 0x004)
 #define R92S_SYS_CLKR		(R92S_SYSCFG + 0x008)
 #define R92S_EE_9346CR		(R92S_SYSCFG + 0x00a)
 #define R92S_AFE_MISC		(R92S_SYSCFG + 0x010)
 #define R92S_SPS0_CTRL		(R92S_SYSCFG + 0x011)
 #define R92S_SPS1_CTRL		(R92S_SYSCFG + 0x018)
 #define R92S_RF_CTRL		(R92S_SYSCFG + 0x01f)
 #define R92S_LDOA15_CTRL	(R92S_SYSCFG + 0x020)
 #define R92S_LDOV12D_CTRL	(R92S_SYSCFG + 0x021)
 #define R92S_AFE_XTAL_CTRL	(R92S_SYSCFG + 0x026)
 #define R92S_AFE_PLL_CTRL	(R92S_SYSCFG + 0x028)
 #define R92S_EFUSE_CTRL		(R92S_SYSCFG + 0x030)
 #define R92S_EFUSE_TEST		(R92S_SYSCFG + 0x034)
 #define R92S_EFUSE_CLK_CTRL	(R92S_SYSCFG + 0x2f8)
 
 #define R92S_CMDCTRL		0x0040
 #define R92S_CR			(R92S_CMDCTRL + 0x000)
 #define R92S_TCR		(R92S_CMDCTRL + 0x004)
 #define R92S_RCR		(R92S_CMDCTRL + 0x008)
 
 #define R92S_MACIDSETTING	0x0050
 #define R92S_MACID		(R92S_MACIDSETTING + 0x000)
+#define R92S_MAR		(R92S_MACIDSETTING + 0x010)
 
 #define R92S_GP			0x01e0
 #define R92S_GPIO_CTRL		(R92S_GP + 0x00c)
 #define R92S_GPIO_IO_SEL	(R92S_GP + 0x00e)
 #define R92S_MAC_PINMUX_CTRL	(R92S_GP + 0x011)
 
 #define R92S_IOCMD_CTRL		0x0370
 #define R92S_IOCMD_DATA		0x0374
 
 #define R92S_USB_HRPWM		0xfe58
 
 /* Bits for R92S_SYS_FUNC_EN. */
 #define R92S_FEN_CPUEN	0x0400
 
 /* Bits for R92S_PMC_FSM. */
 #define R92S_PMC_FSM_CUT_M	0x000f8000
 #define R92S_PMC_FSM_CUT_S	15
 
 /* Bits for R92S_SYS_CLKR. */
 #define R92S_SYS_CLKSEL		0x0001
 #define R92S_SYS_PS_CLKSEL	0x0002
 #define R92S_SYS_CPU_CLKSEL	0x0004
 #define R92S_MAC_CLK_EN		0x0800
 #define R92S_SYS_CLK_EN		0x1000
 #define R92S_SWHW_SEL		0x4000
 #define R92S_FWHW_SEL		0x8000
 
 /* Bits for R92S_EE_9346CR. */
 #define R92S_9356SEL		0x10
 #define R92S_EEPROM_EN		0x20
 
 /* Bits for R92S_AFE_MISC. */
 #define R92S_AFE_MISC_BGEN	0x01
 #define R92S_AFE_MISC_MBEN	0x02
 #define R92S_AFE_MISC_I32_EN	0x08
 
 /* Bits for R92S_SPS1_CTRL. */
 #define R92S_SPS1_LDEN	0x01
 #define R92S_SPS1_SWEN	0x02
 
 /* Bits for R92S_LDOA15_CTRL. */
 #define R92S_LDA15_EN	0x01
 
 /* Bits for R92S_LDOV12D_CTRL. */
 #define R92S_LDV12_EN	0x01
 
 /* Bits for R92C_EFUSE_CTRL. */
 #define R92S_EFUSE_CTRL_DATA_M	0x000000ff
 #define R92S_EFUSE_CTRL_DATA_S	0
 #define R92S_EFUSE_CTRL_ADDR_M	0x0003ff00
 #define R92S_EFUSE_CTRL_ADDR_S	8
 #define R92S_EFUSE_CTRL_VALID	0x80000000
 
 /* Bits for R92S_CR. */
 #define R92S_CR_TXDMA_EN	0x10
 
 /* Bits for R92S_TCR. */
 #define R92S_TCR_IMEM_CODE_DONE	0x01
 #define R92S_TCR_IMEM_CHK_RPT	0x02
 #define R92S_TCR_EMEM_CODE_DONE	0x04
 #define R92S_TCR_EMEM_CHK_RPT	0x08
 #define R92S_TCR_DMEM_CODE_DONE	0x10
 #define R92S_TCR_IMEM_RDY	0x20
 #define R92S_TCR_FWRDY		0x80
 
 /* Bits for R92S_GPIO_IO_SEL. */
 #define R92S_GPIO_WPS	0x10
 
 /* Bits for R92S_MAC_PINMUX_CTRL. */
 #define R92S_GPIOSEL_GPIO_M		0x03
 #define R92S_GPIOSEL_GPIO_S		0
 #define R92S_GPIOSEL_GPIO_JTAG		0
 #define R92S_GPIOSEL_GPIO_PHYDBG	1
 #define R92S_GPIOSEL_GPIO_BT		2
 #define R92S_GPIOSEL_GPIO_WLANDBG	3
 #define R92S_GPIOMUX_EN			0x08
 
 /* Bits for R92S_IOCMD_CTRL. */
 #define R92S_IOCMD_CLASS_M		0xff000000
 #define R92S_IOCMD_CLASS_S		24
 #define R92S_IOCMD_CLASS_BB_RF		0xf0
 #define R92S_IOCMD_VALUE_M		0x00ffff00
 #define R92S_IOCMD_VALUE_S		8
 #define R92S_IOCMD_INDEX_M		0x000000ff
 #define R92S_IOCMD_INDEX_S		0
 #define R92S_IOCMD_INDEX_BB_READ	0
 #define R92S_IOCMD_INDEX_BB_WRITE	1
 #define R92S_IOCMD_INDEX_RF_READ	2
 #define R92S_IOCMD_INDEX_RF_WRITE	3
 
 /* Bits for R92S_USB_HRPWM. */
 #define R92S_USB_HRPWM_PS_ALL_ON	0x04
 #define R92S_USB_HRPWM_PS_ST_ACTIVE	0x08
 
 /*
  * Macros to access subfields in registers.
  */
 /* Mask and Shift (getter). */
 #define MS(val, field)							\
 	(((val) & field##_M) >> field##_S)
 
 /* Shift and Mask (setter). */
 #define SM(field, val)							\
 	(((val) << field##_S) & field##_M)
 
 /* Rewrite. */
 #define RW(var, field, val)						\
 	(((var) & ~field##_M) | SM(field, val))
 
 /*
  * ROM field with RF config.
  */
 enum {
 	RTL8712_RFCONFIG_1T = 0x10,
 	RTL8712_RFCONFIG_2T = 0x20,
 	RTL8712_RFCONFIG_1R = 0x01,
 	RTL8712_RFCONFIG_2R = 0x02,
 	RTL8712_RFCONFIG_1T1R = 0x11,
 	RTL8712_RFCONFIG_1T2R = 0x12,
 	RTL8712_RFCONFIG_TURBO = 0x92,
 	RTL8712_RFCONFIG_2T2R = 0x22
 };
 
 /*
  * Firmware image header.
  */
 struct r92s_fw_priv {
 	/* QWORD0 */
 	uint16_t	signature;
 	uint8_t		hci_sel;
 #define R92S_HCI_SEL_PCIE	0x01
 #define R92S_HCI_SEL_USB	0x02
 #define R92S_HCI_SEL_SDIO	0x04
 #define R92S_HCI_SEL_8172	0x10
 #define R92S_HCI_SEL_AP		0x80
 
 	uint8_t		chip_version;
 	uint16_t	custid;
 	uint8_t		rf_config;
 //0x11:  1T1R, 0x12: 1T2R, 0x92: 1T2R turbo, 0x22: 2T2R
 	uint8_t		nendpoints;
 	/* QWORD1 */
 	uint32_t	regulatory;
 	uint8_t		rfintfs;
 	uint8_t		def_nettype;
 	uint8_t		turbo_mode;
 	uint8_t		lowpower_mode;
 	/* QWORD2 */
 	uint8_t		lbk_mode;
 	uint8_t		mp_mode;
 	uint8_t		vcs_type;
 #define R92S_VCS_TYPE_DISABLE	0
 #define R92S_VCS_TYPE_ENABLE	1
 #define R92S_VCS_TYPE_AUTO	2
 
 	uint8_t		vcs_mode;
 #define R92S_VCS_MODE_NONE	0
 #define R92S_VCS_MODE_RTS_CTS	1
 #define R92S_VCS_MODE_CTS2SELF	2
 
 	uint32_t	reserved1;
 	/* QWORD3 */
 	uint8_t		qos_en;
 	uint8_t		bw40_en;
 	uint8_t		amsdu2ampdu_en;
 	uint8_t		ampdu_en;
 	uint8_t		rc_offload;
 	uint8_t		agg_offload;
 	uint16_t	reserved2;
 	/* QWORD4 */
 	uint8_t		beacon_offload;
 	uint8_t		mlme_offload;
 	uint8_t		hwpc_offload;
 	uint8_t		tcpcsum_offload;
 	uint8_t		tcp_offload;
 	uint8_t		ps_offload;
 	uint8_t		wwlan_offload;
 	uint8_t		reserved3;
 	/* QWORD5 */
 	uint16_t	tcp_tx_len;
 	uint16_t	tcp_rx_len;
 	uint32_t	reserved4;
 } __packed;
 
 struct r92s_fw_hdr {
 	uint16_t	signature;
 	uint16_t	version;
 	uint32_t	dmemsz;
 	uint32_t	imemsz;
 	uint32_t	sramsz;
 	uint32_t	privsz;
 	uint16_t	efuse_addr;
 	uint16_t	h2c_resp_addr;
 	uint32_t	svnrev;
 	uint8_t		month;
 	uint8_t		day;
 	uint8_t		hour;
 	uint8_t		minute;
 	struct		r92s_fw_priv priv;
 } __packed;
 
 /* Structure for FW commands and FW events notifications. */
 struct r92s_fw_cmd_hdr {
 	uint16_t	len;
 	uint8_t		code;
 	uint8_t		seq;
 #define R92S_FW_CMD_MORE	0x80
 
 	uint32_t	reserved;
 } __packed;
 
 /* FW commands codes. */
 #define R92S_CMD_READ_MACREG		0
 #define R92S_CMD_WRITE_MACREG		1
 #define R92S_CMD_READ_BBREG		2
 #define R92S_CMD_WRITE_BBREG		3
 #define R92S_CMD_READ_RFREG		4
 #define R92S_CMD_WRITE_RFREG		5
 #define R92S_CMD_READ_EEPROM		6
 #define R92S_CMD_WRITE_EEPROM		7
 #define R92S_CMD_READ_EFUSE		8
 #define R92S_CMD_WRITE_EFUSE		9
 #define R92S_CMD_READ_CAM		10
 #define R92S_CMD_WRITE_CAM		11
 #define R92S_CMD_SET_BCNITV		12
 #define R92S_CMD_SET_MBIDCFG		13
 #define R92S_CMD_JOIN_BSS		14
 #define R92S_CMD_DISCONNECT		15
 #define R92S_CMD_CREATE_BSS		16
 #define R92S_CMD_SET_OPMODE		17
 #define R92S_CMD_SITE_SURVEY		18
 #define R92S_CMD_SET_AUTH		19
 #define R92S_CMD_SET_KEY		20
 #define R92S_CMD_SET_STA_KEY		21
 #define R92S_CMD_SET_ASSOC_STA		22
 #define R92S_CMD_DEL_ASSOC_STA		23
 #define R92S_CMD_SET_STAPWRSTATE	24
 #define R92S_CMD_SET_BASIC_RATE		25
 #define R92S_CMD_GET_BASIC_RATE		26
 #define R92S_CMD_SET_DATA_RATE		27
 #define R92S_CMD_GET_DATA_RATE		28
 #define R92S_CMD_SET_PHY_INFO		29
 #define R92S_CMD_GET_PHY_INFO		30
 #define R92S_CMD_SET_PHY		31
 #define R92S_CMD_GET_PHY		32
 #define R92S_CMD_READ_RSSI		33
 #define R92S_CMD_READ_GAIN		34
 #define R92S_CMD_SET_ATIM		35
 #define R92S_CMD_SET_PWR_MODE		36
 #define R92S_CMD_JOIN_BSS_RPT		37
 #define R92S_CMD_SET_RA_TABLE		38
 #define R92S_CMD_GET_RA_TABLE		39
 #define R92S_CMD_GET_CCX_REPORT		40
 #define R92S_CMD_GET_DTM_REPORT		41
 #define R92S_CMD_GET_TXRATE_STATS	42
 #define R92S_CMD_SET_USB_SUSPEND	43
 #define R92S_CMD_SET_H2C_LBK		44
 #define R92S_CMD_ADDBA_REQ		45
 #define R92S_CMD_SET_CHANNEL		46
 #define R92S_CMD_SET_TXPOWER		47
 #define R92S_CMD_SWITCH_ANTENNA		48
 #define R92S_CMD_SET_CRYSTAL_CAL	49
 #define R92S_CMD_SET_SINGLE_CARRIER_TX	50
 #define R92S_CMD_SET_SINGLE_TONE_TX	51
 #define R92S_CMD_SET_CARRIER_SUPPR_TX	52
 #define R92S_CMD_SET_CONTINUOUS_TX	53
 #define R92S_CMD_SWITCH_BANDWIDTH	54
 #define R92S_CMD_TX_BEACON		55
 #define R92S_CMD_SET_POWER_TRACKING	56
 #define R92S_CMD_AMSDU_TO_AMPDU		57
 #define R92S_CMD_SET_MAC_ADDRESS	58
 #define R92S_CMD_GET_H2C_LBK		59
 #define R92S_CMD_SET_PBREQ_IE		60
 #define R92S_CMD_SET_ASSOCREQ_IE	61
 #define R92S_CMD_SET_PBRESP_IE		62
 #define R92S_CMD_SET_ASSOCRESP_IE	63
 #define R92S_CMD_GET_CURDATARATE	64
 #define R92S_CMD_GET_TXRETRY_CNT	65
 #define R92S_CMD_GET_RXRETRY_CNT	66
 #define R92S_CMD_GET_BCNOK_CNT		67
 #define R92S_CMD_GET_BCNERR_CNT		68
 #define R92S_CMD_GET_CURTXPWR_LEVEL	69
 #define R92S_CMD_SET_DIG		70
 #define R92S_CMD_SET_RA			71
 #define R92S_CMD_SET_PT			72
 #define R92S_CMD_READ_TSSI		73
 
 /* FW events notifications codes. */
 #define R92S_EVT_READ_MACREG		0
 #define R92S_EVT_READ_BBREG		1
 #define R92S_EVT_READ_RFREG		2
 #define R92S_EVT_READ_EEPROM		3
 #define R92S_EVT_READ_EFUSE		4
 #define R92S_EVT_READ_CAM		5
 #define R92S_EVT_GET_BASICRATE		6
 #define R92S_EVT_GET_DATARATE		7
 #define R92S_EVT_SURVEY			8
 #define R92S_EVT_SURVEY_DONE		9
 #define R92S_EVT_JOIN_BSS		10
 #define R92S_EVT_ADD_STA		11
 #define R92S_EVT_DEL_STA		12
 #define R92S_EVT_ATIM_DONE		13
 #define R92S_EVT_TX_REPORT		14
 #define R92S_EVT_CCX_REPORT		15
 #define R92S_EVT_DTM_REPORT		16
 #define R92S_EVT_TXRATE_STATS		17
 #define R92S_EVT_C2H_LBK		18
 #define R92S_EVT_FWDBG			19
 #define R92S_EVT_C2H_FEEDBACK		20
 #define R92S_EVT_ADDBA			21
 #define R92S_EVT_C2H_BCN		22
 #define R92S_EVT_PWR_STATE		23
 #define R92S_EVT_WPS_PBC		24
 #define R92S_EVT_ADDBA_REQ_REPORT	25
 
 /* Structure for R92S_CMD_SITE_SURVEY. */
 struct r92s_fw_cmd_sitesurvey {
 	uint32_t	active;
 	uint32_t	limit;
 	uint32_t	ssidlen;
 	uint8_t		ssid[32 + 1];
 } __packed;
 
 /* Structure for R92S_CMD_SET_AUTH. */
 struct r92s_fw_cmd_auth {
 	uint8_t	mode;
 #define R92S_AUTHMODE_OPEN	0
 #define R92S_AUTHMODE_SHARED	1
 #define R92S_AUTHMODE_WPA	2
 
 	uint8_t	dot1x;
 } __packed;
 
 /* Structure for R92S_CMD_SET_KEY. */
 struct r92s_fw_cmd_set_key {
 	uint8_t	algo;
 #define R92S_KEY_ALGO_NONE	0
 #define R92S_KEY_ALGO_WEP40	1
 #define R92S_KEY_ALGO_TKIP	2
 #define R92S_KEY_ALGO_TKIP_MMIC	3
 #define R92S_KEY_ALGO_AES	4
 #define R92S_KEY_ALGO_WEP104	5
 
 	uint8_t	id;
 	uint8_t	grpkey;
 	uint8_t	key[16];
 } __packed;
 
 /* Structures for R92S_EVENT_SURVEY/R92S_CMD_JOIN_BSS. */
 /* NDIS_802_11_SSID. */
 struct ndis_802_11_ssid {
 	uint32_t	ssidlen;
 	uint8_t		ssid[32];
 } __packed;
 
 /* NDIS_802_11_CONFIGURATION_FH. */
 struct ndis_802_11_configuration_fh {
 	uint32_t	len;
 	uint32_t	hoppattern;
 	uint32_t	hopset;
 	uint32_t	dwelltime;
 } __packed;
 
 /* NDIS_802_11_CONFIGURATION. */
 struct ndis_802_11_configuration {
 	uint32_t	len;
 	uint32_t	bintval;
 	uint32_t	atim;
 	uint32_t	dsconfig;
 	struct		ndis_802_11_configuration_fh fhconfig;
 } __packed;
 
 /* NDIS_WLAN_BSSID_EX. */
 struct ndis_wlan_bssid_ex {
 	uint32_t	len;
 	uint8_t		macaddr[IEEE80211_ADDR_LEN];
 	uint8_t		reserved[2];
 	struct		ndis_802_11_ssid ssid;
 	uint32_t	privacy;
 	int32_t		rssi;
 	uint32_t	networktype;
 #define NDIS802_11FH		0
 #define NDIS802_11DS		1
 #define NDIS802_11OFDM5		2
 #define NDIS802_11OFDM24	3
 #define NDIS802_11AUTOMODE	4
 
 	struct		ndis_802_11_configuration config;
 	uint32_t	inframode;
 #define NDIS802_11IBSS			0
 #define NDIS802_11INFRASTRUCTURE	1
 #define NDIS802_11AUTOUNKNOWN		2
 #define NDIS802_11MONITOR		3
 #define NDIS802_11APMODE		4
 
 	uint8_t		supprates[16];
 	uint32_t	ieslen;
 	/* Followed by ``ieslen'' bytes. */
 } __packed;
 
 /* NDIS_802_11_FIXED_IEs. */
 struct ndis_802_11_fixed_ies {
 	uint8_t		tstamp[8];
 	uint16_t	bintval;
 	uint16_t	capabilities;
 } __packed;
 
 /* Structure for R92S_CMD_SET_PWR_MODE. */
 struct r92s_set_pwr_mode {
 	uint8_t		mode;
 #define R92S_PS_MODE_ACTIVE	0
 #define R92S_PS_MODE_MIN	1
 #define R92S_PS_MODE_MAX	2
 #define R92S_PS_MODE_DTIM	3
 #define R92S_PS_MODE_VOIP	4
 #define R92S_PS_MODE_UAPSD_WMM	5
 #define R92S_PS_MODE_UAPSD	6
 #define R92S_PS_MODE_IBSS	7
 #define R92S_PS_MODE_WWLAN	8
 #define R92S_PS_MODE_RADIOOFF	9
 #define R92S_PS_MODE_DISABLE	10
 
 	uint8_t		low_traffic_en;
 	uint8_t		lpnav_en;
 	uint8_t		rf_low_snr_en;
 	uint8_t		dps_en;
 	uint8_t		bcn_rx_en;
 	uint8_t		bcn_pass_cnt;
 	uint8_t		bcn_to;
 	uint16_t	bcn_itv;
 	uint8_t		app_itv;
 	uint8_t		awake_bcn_itv;
 	uint8_t		smart_ps;
 	uint8_t		bcn_pass_time;
 } __packed;
 
 /* Structure for event R92S_EVENT_JOIN_BSS. */
 struct r92s_event_join_bss {
 	uint32_t	next;
 	uint32_t	prev;
 	uint32_t	networktype;
 	uint32_t	fixed;
 	uint32_t	lastscanned;
 	uint32_t	associd;
 	uint32_t	join_res;
 	struct		ndis_wlan_bssid_ex bss;
 } __packed;
 
 #define R92S_MACID_BSS	5
 
 /* Rx MAC descriptor. */
 struct r92s_rx_stat {
 	uint32_t	rxdw0;
 #define R92S_RXDW0_PKTLEN_M	0x00003fff
 #define R92S_RXDW0_PKTLEN_S	0
 #define R92S_RXDW0_CRCERR	0x00004000
 #define R92S_RXDW0_INFOSZ_M	0x000f0000
 #define R92S_RXDW0_INFOSZ_S	16
 #define R92S_RXDW0_QOS		0x00800000
 #define R92S_RXDW0_SHIFT_M	0x03000000
 #define R92S_RXDW0_SHIFT_S	24
 #define R92S_RXDW0_DECRYPTED	0x08000000
 
 	uint32_t	rxdw1;
 #define R92S_RXDW1_MOREFRAG	0x08000000
 
 	uint32_t	rxdw2;
 #define R92S_RXDW2_FRAG_M	0x0000f000
 #define R92S_RXDW2_FRAG_S	12
 #define R92S_RXDW2_PKTCNT_M	0x00ff0000
 #define R92S_RXDW2_PKTCNT_S	16
 
 	uint32_t	rxdw3;
 #define R92S_RXDW3_RATE_M	0x0000003f
 #define R92S_RXDW3_RATE_S	0
 #define R92S_RXDW3_TCPCHKRPT	0x00000800
 #define R92S_RXDW3_IPCHKRPT	0x00001000
 #define R92S_RXDW3_TCPCHKVALID	0x00002000
 #define R92S_RXDW3_HTC		0x00004000
 
 	uint32_t	rxdw4;
 	uint32_t	rxdw5;
 } __packed __aligned(4);
 
 /* Rx PHY descriptor. */
 struct r92s_rx_phystat {
 	uint32_t	phydw0;
 	uint32_t	phydw1;
 	uint32_t	phydw2;
 	uint32_t	phydw3;
 	uint32_t	phydw4;
 	uint32_t	phydw5;
 	uint32_t	phydw6;
 	uint32_t	phydw7;
 } __packed __aligned(4);
 
 /* Rx PHY CCK descriptor. */
 struct r92s_rx_cck {
 	uint8_t		adc_pwdb[4];
 	uint8_t		sq_rpt;
 	uint8_t		agc_rpt;
 } __packed;
 
 /* Tx MAC descriptor. */
 struct r92s_tx_desc {
 	uint32_t	txdw0;
 #define R92S_TXDW0_PKTLEN_M	0x0000ffff
 #define R92S_TXDW0_PKTLEN_S	0
 #define R92S_TXDW0_OFFSET_M	0x00ff0000
 #define R92S_TXDW0_OFFSET_S	16
 #define R92S_TXDW0_TYPE_M	0x03000000
 #define R92S_TXDW0_TYPE_S	24
 #define R92S_TXDW0_LSG		0x04000000
 #define R92S_TXDW0_FSG		0x08000000
 #define R92S_TXDW0_LINIP	0x10000000
 #define R92S_TXDW0_OWN		0x80000000
 
 	uint32_t	txdw1;
 #define R92S_TXDW1_MACID_M	0x0000001f
 #define R92S_TXDW1_MACID_S	0
 #define R92S_TXDW1_MOREDATA	0x00000020
 #define R92S_TXDW1_MOREFRAG	0x00000040
 #define R92S_TXDW1_QSEL_M	0x00001f00
 #define R92S_TXDW1_QSEL_S	8
 #define R92S_TXDW1_QSEL_BE	0x03
 #define R92S_TXDW1_QSEL_H2C	0x13
 #define R92S_TXDW1_NONQOS	0x00010000
 #define R92S_TXDW1_KEYIDX_M	0x00060000
 #define R92S_TXDW1_KEYIDX_S	17
 #define R92S_TXDW1_CIPHER_M	0x00c00000
 #define R92S_TXDW1_CIPHER_S	22
 #define R92S_TXDW1_CIPHER_WEP	1
 #define R92S_TXDW1_CIPHER_TKIP	2
 #define R92S_TXDW1_CIPHER_AES	3
 #define R92S_TXDW1_HWPC		0x80000000
 
 	uint32_t	txdw2;
 #define R92S_TXDW2_BMCAST	0x00000080
 #define R92S_TXDW2_AGGEN	0x20000000
 #define R92S_TXDW2_BK		0x40000000
 
 	uint32_t	txdw3;
 #define R92S_TXDW3_SEQ_M	0x0fff0000
 #define R92S_TXDW3_SEQ_S	16
 #define R92S_TXDW3_FRAG_M	0xf0000000
 #define R92S_TXDW3_FRAG_S	28
 
 	uint32_t	txdw4;
 #define R92S_TXDW4_TXBW		0x00040000
 
 	uint32_t	txdw5;
 #define R92S_TXDW5_DISFB	0x00008000
 
 	uint16_t	ipchksum;
 	uint16_t	tcpchksum;
 
 	uint16_t	txbufsize;
 	uint16_t	reserved1;
 } __packed __aligned(4);
 
 struct r92s_add_ba_event {
 	uint8_t mac_addr[IEEE80211_ADDR_LEN];
 	uint16_t ssn;
 	uint8_t tid;
 };
 
 struct r92s_add_ba_req {
 	uint32_t tid;
 };
 
 /*
  * Driver definitions.
  */
 #define RSU_RX_LIST_COUNT	100
 #define RSU_TX_LIST_COUNT	32
 
 #define RSU_HOST_CMD_RING_COUNT	32
 
 #define RSU_RXBUFSZ	(8 * 1024)
 #define RSU_TXBUFSZ	\
 	((sizeof(struct r92s_tx_desc) + IEEE80211_MAX_LEN + 3) & ~3)
 
 #define RSU_TX_TIMEOUT	5000	/* ms */
 #define RSU_CMD_TIMEOUT	2000	/* ms */
 
 /* Queue ids (used by soft only). */
 #define RSU_QID_BCN	0
 #define RSU_QID_MGT	1
 #define RSU_QID_BMC	2
 #define RSU_QID_VO	3
 #define RSU_QID_VI	4
 #define RSU_QID_BE	5
 #define RSU_QID_BK	6
 #define RSU_QID_RXOFF	7
 #define RSU_QID_H2C	8
 #define RSU_QID_C2H	9
 
 /* Map AC to queue id. */
 static const uint8_t rsu_ac2qid[WME_NUM_AC] = {
 	RSU_QID_BE,
 	RSU_QID_BK,
 	RSU_QID_VI,
 	RSU_QID_VO
 };
 
 /* Pipe index to endpoint address mapping. */
 static const uint8_t r92s_epaddr[] =
     { 0x83, 0x04, 0x06, 0x0d,
       0x05, 0x07,
       0x89, 0x0a, 0x0b, 0x0c };
 
 /* Queue id to pipe index mapping for 4 endpoints configurations. */
 static const uint8_t rsu_qid2idx_4ep[] =
     { 3, 3, 3, 1, 1, 2, 2, 0, 3, 0 };
 
 /* Queue id to pipe index mapping for 6 endpoints configurations. */
 static const uint8_t rsu_qid2idx_6ep[] =
     { 3, 3, 3, 1, 4, 2, 5, 0, 3, 0 };
 
 /* Queue id to pipe index mapping for 11 endpoints configurations. */
 static const uint8_t rsu_qid2idx_11ep[] =
     { 7, 9, 8, 1, 4, 2, 5, 0, 3, 6 };
 
 struct rsu_rx_radiotap_header {
 	struct ieee80211_radiotap_header wr_ihdr;
 	uint8_t		wr_flags;
 	uint8_t		wr_rate;
 	uint16_t	wr_chan_freq;
 	uint16_t	wr_chan_flags;
 	uint8_t		wr_dbm_antsignal;
 } __packed __aligned(8);
 
 #define RSU_RX_RADIOTAP_PRESENT			\
 	(1 << IEEE80211_RADIOTAP_FLAGS |	\
 	 1 << IEEE80211_RADIOTAP_RATE |		\
 	 1 << IEEE80211_RADIOTAP_CHANNEL |	\
 	 1 << IEEE80211_RADIOTAP_DBM_ANTSIGNAL)
 
 struct rsu_tx_radiotap_header {
 	struct ieee80211_radiotap_header wt_ihdr;
 	uint8_t		wt_flags;
 	uint16_t	wt_chan_freq;
 	uint16_t	wt_chan_flags;
 } __packed __aligned(8);
 
 #define RSU_TX_RADIOTAP_PRESENT			\
 	(1 << IEEE80211_RADIOTAP_FLAGS |	\
 	 1 << IEEE80211_RADIOTAP_CHANNEL)
 
 struct rsu_softc;
 
 struct rsu_host_cmd {
 	void	(*cb)(struct rsu_softc *, void *);
 	uint8_t	data[256];
 };
 
 struct rsu_cmd_newstate {
 	enum ieee80211_state	state;
 	int			arg;
 };
 
 struct rsu_cmd_key {
 	struct ieee80211_key	key;
 };
 
 struct rsu_host_cmd_ring {
 	struct rsu_host_cmd	cmd[RSU_HOST_CMD_RING_COUNT];
 	int			cur;
 	int			next;
 	int			queued;
 };
 
 enum {
 	RSU_BULK_RX,
 	RSU_BULK_TX_BE_BK,	/* = WME_AC_BE/BK */
 	RSU_BULK_TX_VI_VO,	/* = WME_AC_VI/VO */
 	RSU_BULK_TX_H2C,	/* H2C */
 	RSU_N_TRANSFER,
 };
 
 struct rsu_data {
 	struct rsu_softc	*sc;
 	uint8_t			*buf;
 	uint16_t		buflen;
 	struct mbuf		*m;
 	struct ieee80211_node	*ni;
 	STAILQ_ENTRY(rsu_data)  next;
 };
 
 struct rsu_vap {
 	struct ieee80211vap		vap;
 
 	int				(*newstate)(struct ieee80211vap *,
 					    enum ieee80211_state, int);
 };
 #define RSU_VAP(vap) 			((struct rsu_vap *)(vap))
 
 #define	RSU_LOCK(sc)			mtx_lock(&(sc)->sc_mtx)
 #define	RSU_UNLOCK(sc)			mtx_unlock(&(sc)->sc_mtx)
 #define	RSU_ASSERT_LOCKED(sc)		mtx_assert(&(sc)->sc_mtx, MA_OWNED)
 
 struct rsu_softc {
 	struct ieee80211com		sc_ic;
 	struct mbufq			sc_snd;
 	device_t			sc_dev;
 	struct usb_device		*sc_udev;
 	int				(*sc_newstate)(struct ieee80211com *,
 					    enum ieee80211_state, int);
 	struct usbd_interface		*sc_iface;
 	struct timeout_task		calib_task;
 	struct task			tx_task;
 	const uint8_t			*qid2idx;
 	struct mtx			sc_mtx;
 	int				sc_ht;
 	int				sc_nendpoints;
 	int				sc_curpwrstate;
 	int				sc_currssi;
 
 	u_int				sc_running:1,
 					sc_calibrating:1,
 					sc_active_scan:1,
 					sc_extra_scan:1;
 	u_int				cut;
 	uint8_t				sc_rftype;
 	int8_t				sc_nrxstream;
 	int8_t				sc_ntxstream;
 	struct rsu_host_cmd_ring	cmdq;
 	struct rsu_data			sc_rx[RSU_RX_LIST_COUNT];
 	struct rsu_data			sc_tx[RSU_TX_LIST_COUNT];
 	struct rsu_data			*fwcmd_data;
 	uint8_t				cmd_seq;
 	uint8_t				rom[128];
 	struct usb_xfer			*sc_xfer[RSU_N_TRANSFER];
 
 	STAILQ_HEAD(, rsu_data)		sc_rx_active;
 	STAILQ_HEAD(, rsu_data)		sc_rx_inactive;
 	STAILQ_HEAD(, rsu_data)		sc_tx_active[RSU_N_TRANSFER];
 	STAILQ_HEAD(, rsu_data)		sc_tx_inactive;
 	STAILQ_HEAD(, rsu_data)		sc_tx_pending[RSU_N_TRANSFER];
 
 	union {
 		struct rsu_rx_radiotap_header th;
 		uint8_t	pad[64];
 	}				sc_rxtapu;
 #define sc_rxtap	sc_rxtapu.th
 
 	union {
 		struct rsu_tx_radiotap_header th;
 		uint8_t	pad[64];
 	}				sc_txtapu;
 #define sc_txtap	sc_txtapu.th
 };
Index: projects/clang391-import/sys/netinet/ip_fastfwd.c
===================================================================
--- projects/clang391-import/sys/netinet/ip_fastfwd.c	(revision 309262)
+++ projects/clang391-import/sys/netinet/ip_fastfwd.c	(revision 309263)
@@ -1,479 +1,434 @@
 /*-
  * Copyright (c) 2003 Andre Oppermann, Internet Business Solutions AG
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The name of the author may not be used to endorse or promote
  *    products derived from this software without specific prior written
  *    permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * ip_fastforward gets its speed from processing the forwarded packet to
  * completion (if_output on the other side) without any queues or netisr's.
  * The receiving interface DMAs the packet into memory, the upper half of
  * driver calls ip_fastforward, we do our routing table lookup and directly
  * send it off to the outgoing interface, which DMAs the packet to the
  * network card. The only part of the packet we touch with the CPU is the
  * IP header (unless there are complex firewall rules touching other parts
  * of the packet, but that is up to you). We are essentially limited by bus
  * bandwidth and how fast the network card/driver can set up receives and
  * transmits.
  *
  * We handle basic errors, IP header errors, checksum errors,
  * destination unreachable, fragmentation and fragmentation needed and
  * report them via ICMP to the sender.
  *
  * Else if something is not pure IPv4 unicast forwarding we fall back to
  * the normal ip_input processing path. We should only be called from
  * interfaces connected to the outside world.
  *
  * Firewalling is fully supported including divert, ipfw fwd and ipfilter
  * ipnat and address rewrite.
  *
  * IPSEC is not supported if this host is a tunnel broker. IPSEC is
  * supported for connections to/from local host.
  *
  * We try to do the least expensive (in CPU ops) checks and operations
  * first to catch junk with as little overhead as possible.
  * 
  * We take full advantage of hardware support for IP checksum and
  * fragmentation offloading.
  *
  * We don't do ICMP redirect in the fast forwarding path. I have had my own
  * cases where two core routers with Zebra routing suite would send millions
  * ICMP redirects to connected hosts if the destination router was not the
  * default gateway. In one case it was filling the routing table of a host
  * with approximately 300.000 cloned redirect entries until it ran out of
  * kernel memory. However the networking code proved very robust and it didn't
  * crash or fail in other ways.
  */
 
 /*
  * Many thanks to Matt Thomas of NetBSD for basic structure of ip_flow.c which
  * is being followed here.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ipstealth.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/protosw.h>
 #include <sys/sdt.h>
 #include <sys/socket.h>
 #include <sys/sysctl.h>
 
 #include <net/pfil.h>
 #include <net/if.h>
 #include <net/if_types.h>
 #include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/route.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
+#include <netinet/in_fib.h>
 #include <netinet/in_kdtrace.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_icmp.h>
 #include <netinet/ip_options.h>
 
 #include <machine/in_cksum.h>
 
-static struct sockaddr_in *
-ip_findroute(struct route *ro, struct in_addr dest, struct mbuf *m)
+static int
+ip_findroute(struct nhop4_basic *pnh, struct in_addr dest, struct mbuf *m)
 {
-	struct sockaddr_in *dst;
-	struct rtentry *rt;
 
+	bzero(pnh, sizeof(*pnh));
+	if (fib4_lookup_nh_basic(M_GETFIB(m), dest, 0, 0, pnh) != 0) {
+		IPSTAT_INC(ips_noroute);
+		IPSTAT_INC(ips_cantforward);
+		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0);
+		return (EHOSTUNREACH);
+	}
 	/*
-	 * Find route to destination.
+	 * Drop blackholed traffic and directed broadcasts.
 	 */
-	bzero(ro, sizeof(*ro));
-	dst = (struct sockaddr_in *)&ro->ro_dst;
-	dst->sin_family = AF_INET;
-	dst->sin_len = sizeof(*dst);
-	dst->sin_addr.s_addr = dest.s_addr;
-	in_rtalloc_ign(ro, 0, M_GETFIB(m));
+	if ((pnh->nh_flags & (NHF_BLACKHOLE | NHF_BROADCAST)) != 0) {
+		IPSTAT_INC(ips_cantforward);
+		m_freem(m);
+		return (EHOSTUNREACH);
+	}
 
-	/*
-	 * Route there and interface still up?
-	 */
-	rt = ro->ro_rt;
-	if (rt && (rt->rt_flags & RTF_UP) &&
-	    (rt->rt_ifp->if_flags & IFF_UP) &&
-	    (rt->rt_ifp->if_drv_flags & IFF_DRV_RUNNING)) {
-		if (rt->rt_flags & RTF_GATEWAY)
-			dst = (struct sockaddr_in *)rt->rt_gateway;
-	} else {
-		IPSTAT_INC(ips_noroute);
+	if (pnh->nh_flags & NHF_REJECT) {
 		IPSTAT_INC(ips_cantforward);
-		if (rt)
-			RTFREE(rt);
 		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0);
-		return NULL;
+		return (EHOSTUNREACH);
 	}
-	return dst;
+
+	return (0);
 }
 
 /*
  * Try to forward a packet based on the destination address.
  * This is a fast path optimized for the plain forwarding case.
  * If the packet is handled (and consumed) here then we return NULL;
  * otherwise mbuf is returned and the packet should be delivered
  * to ip_input for full processing.
  */
 struct mbuf *
 ip_tryforward(struct mbuf *m)
 {
 	struct ip *ip;
 	struct mbuf *m0 = NULL;
-	struct route ro;
-	struct sockaddr_in *dst = NULL;
-	struct ifnet *ifp;
+	struct nhop4_basic nh;
+	struct sockaddr_in dst;
 	struct in_addr odest, dest;
 	uint16_t ip_len, ip_off;
 	int error = 0;
-	int mtu;
 	struct m_tag *fwd_tag = NULL;
 
 	/*
 	 * Are we active and forwarding packets?
 	 */
 
 	M_ASSERTVALID(m);
 	M_ASSERTPKTHDR(m);
 
-	bzero(&ro, sizeof(ro));
-
-
 #ifdef ALTQ
 	/*
 	 * Is packet dropped by traffic conditioner?
 	 */
 	if (altq_input != NULL && (*altq_input)(m, AF_INET) == 0)
 		goto drop;
 #endif
 
 	/*
 	 * Only IP packets without options
 	 */
 	ip = mtod(m, struct ip *);
 
 	if (ip->ip_hl != (sizeof(struct ip) >> 2)) {
 		if (V_ip_doopts == 1)
 			return m;
 		else if (V_ip_doopts == 2) {
 			icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_FILTER_PROHIB,
 				0, 0);
 			return NULL;	/* mbuf already free'd */
 		}
 		/* else ignore IP options and continue */
 	}
 
 	/*
 	 * Only unicast IP, not from loopback, no L2 or IP broadcast,
 	 * no multicast, no INADDR_ANY
 	 *
 	 * XXX: Probably some of these checks could be direct drop
 	 * conditions.  However it is not clear whether there are some
 	 * hacks or obscure behaviours which make it necessary to
 	 * let ip_input handle it.  We play safe here and let ip_input
 	 * deal with it until it is proven that we can directly drop it.
 	 */
 	if ((m->m_flags & (M_BCAST|M_MCAST)) ||
 	    (m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) ||
 	    ntohl(ip->ip_src.s_addr) == (u_long)INADDR_BROADCAST ||
 	    ntohl(ip->ip_dst.s_addr) == (u_long)INADDR_BROADCAST ||
 	    IN_MULTICAST(ntohl(ip->ip_src.s_addr)) ||
 	    IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
 	    IN_LINKLOCAL(ntohl(ip->ip_src.s_addr)) ||
 	    IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr)) ||
 	    ip->ip_src.s_addr == INADDR_ANY ||
 	    ip->ip_dst.s_addr == INADDR_ANY )
 		return m;
 
 	/*
 	 * Is it for a local address on this host?
 	 */
 	if (in_localip(ip->ip_dst))
 		return m;
 
 	IPSTAT_INC(ips_total);
 
 	/*
 	 * Step 3: incoming packet firewall processing
 	 */
 
 	odest.s_addr = dest.s_addr = ip->ip_dst.s_addr;
 
 	/*
 	 * Run through list of ipfilter hooks for input packets
 	 */
 	if (!PFIL_HOOKED(&V_inet_pfil_hook))
 		goto passin;
 
 	if (pfil_run_hooks(
 	    &V_inet_pfil_hook, &m, m->m_pkthdr.rcvif, PFIL_IN, NULL) ||
 	    m == NULL)
 		goto drop;
 
 	M_ASSERTVALID(m);
 	M_ASSERTPKTHDR(m);
 
 	ip = mtod(m, struct ip *);	/* m may have changed by pfil hook */
 	dest.s_addr = ip->ip_dst.s_addr;
 
 	/*
 	 * Destination address changed?
 	 */
 	if (odest.s_addr != dest.s_addr) {
 		/*
 		 * Is it now for a local address on this host?
 		 */
 		if (in_localip(dest))
 			goto forwardlocal;
 		/*
 		 * Go on with new destination address
 		 */
 	}
 
 	if (m->m_flags & M_FASTFWD_OURS) {
 		/*
 		 * ipfw changed it for a local address on this host.
 		 */
 		goto forwardlocal;
 	}
 
 passin:
 	/*
 	 * Step 4: decrement TTL and look up route
 	 */
 
 	/*
 	 * Check TTL
 	 */
 #ifdef IPSTEALTH
 	if (!V_ipstealth) {
 #endif
 	if (ip->ip_ttl <= IPTTLDEC) {
 		icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, 0, 0);
 		return NULL;	/* mbuf already free'd */
 	}
 
 	/*
 	 * Decrement the TTL and incrementally change the IP header checksum.
 	 * Don't bother doing this with hw checksum offloading, it's faster
 	 * doing it right here.
 	 */
 	ip->ip_ttl -= IPTTLDEC;
 	if (ip->ip_sum >= (u_int16_t) ~htons(IPTTLDEC << 8))
 		ip->ip_sum -= ~htons(IPTTLDEC << 8);
 	else
 		ip->ip_sum += htons(IPTTLDEC << 8);
 #ifdef IPSTEALTH
 	}
 #endif
 
 	/*
 	 * Find route to destination.
 	 */
-	if ((dst = ip_findroute(&ro, dest, m)) == NULL)
-		return NULL;	/* icmp unreach already sent */
-	ifp = ro.ro_rt->rt_ifp;
+	if (ip_findroute(&nh, dest, m) != 0)
+		return (NULL);	/* icmp unreach already sent */
 
 	/*
-	 * Immediately drop blackholed traffic, and directed broadcasts
-	 * for either the all-ones or all-zero subnet addresses on
-	 * locally attached networks.
-	 */
-	if ((ro.ro_rt->rt_flags & (RTF_BLACKHOLE|RTF_BROADCAST)) != 0)
-		goto drop;
-
-	/*
 	 * Step 5: outgoing firewall packet processing
 	 */
-
-	/*
-	 * Run through list of hooks for output packets.
-	 */
 	if (!PFIL_HOOKED(&V_inet_pfil_hook))
 		goto passout;
 
-	if (pfil_run_hooks(&V_inet_pfil_hook, &m, ifp, PFIL_OUT, NULL) || m == NULL) {
+	if (pfil_run_hooks(&V_inet_pfil_hook, &m, nh.nh_ifp, PFIL_OUT, NULL) ||
+	    m == NULL) {
 		goto drop;
 	}
 
 	M_ASSERTVALID(m);
 	M_ASSERTPKTHDR(m);
 
 	ip = mtod(m, struct ip *);
 	dest.s_addr = ip->ip_dst.s_addr;
 
 	/*
 	 * Destination address changed?
 	 */
 	if (m->m_flags & M_IP_NEXTHOP)
 		fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL);
 	if (odest.s_addr != dest.s_addr || fwd_tag != NULL) {
 		/*
 		 * Is it now for a local address on this host?
 		 */
 		if (m->m_flags & M_FASTFWD_OURS || in_localip(dest)) {
 forwardlocal:
 			/*
 			 * Return packet for processing by ip_input().
 			 */
 			m->m_flags |= M_FASTFWD_OURS;
-			if (ro.ro_rt)
-				RTFREE(ro.ro_rt);
-			return m;
+			return (m);
 		}
 		/*
 		 * Redo route lookup with new destination address
 		 */
 		if (fwd_tag) {
 			dest.s_addr = ((struct sockaddr_in *)
 				    (fwd_tag + 1))->sin_addr.s_addr;
 			m_tag_delete(m, fwd_tag);
 			m->m_flags &= ~M_IP_NEXTHOP;
 		}
-		RTFREE(ro.ro_rt);
-		if ((dst = ip_findroute(&ro, dest, m)) == NULL)
-			return NULL;	/* icmp unreach already sent */
-		ifp = ro.ro_rt->rt_ifp;
+		if (ip_findroute(&nh, dest, m) != 0)
+			return (NULL);	/* icmp unreach already sent */
 	}
 
 passout:
 	/*
 	 * Step 6: send off the packet
 	 */
 	ip_len = ntohs(ip->ip_len);
 	ip_off = ntohs(ip->ip_off);
 
-	/*
-	 * Check if route is dampned (when ARP is unable to resolve)
-	 */
-	if ((ro.ro_rt->rt_flags & RTF_REJECT) &&
-	    (ro.ro_rt->rt_expire == 0 || time_uptime < ro.ro_rt->rt_expire)) {
-		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0);
-		goto consumed;
-	}
+	bzero(&dst, sizeof(dst));
+	dst.sin_family = AF_INET;
+	dst.sin_len = sizeof(dst);
+	dst.sin_addr = nh.nh_addr;
 
 	/*
-	 * Check if media link state of interface is not down
-	 */
-	if (ifp->if_link_state == LINK_STATE_DOWN) {
-		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0);
-		goto consumed;
-	}
-
-	/*
 	 * Check if packet fits MTU or if hardware will fragment for us
 	 */
-	if (ro.ro_rt->rt_mtu)
-		mtu = min(ro.ro_rt->rt_mtu, ifp->if_mtu);
-	else
-		mtu = ifp->if_mtu;
-
-	if (ip_len <= mtu) {
+	if (ip_len <= nh.nh_mtu) {
 		/*
 		 * Avoid confusing lower layers.
 		 */
 		m_clrprotoflags(m);
 		/*
 		 * Send off the packet via outgoing interface
 		 */
-		IP_PROBE(send, NULL, NULL, ip, ifp, ip, NULL);
-		error = (*ifp->if_output)(ifp, m,
-				(struct sockaddr *)dst, &ro);
+		IP_PROBE(send, NULL, NULL, ip, nh.nh_ifp, ip, NULL);
+		error = (*nh.nh_ifp->if_output)(nh.nh_ifp, m,
+		    (struct sockaddr *)&dst, NULL);
 	} else {
 		/*
 		 * Handle EMSGSIZE with icmp reply needfrag for TCP MTU discovery
 		 */
 		if (ip_off & IP_DF) {
 			IPSTAT_INC(ips_cantfrag);
 			icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG,
-				0, mtu);
+				0, nh.nh_mtu);
 			goto consumed;
 		} else {
 			/*
 			 * We have to fragment the packet
 			 */
 			m->m_pkthdr.csum_flags |= CSUM_IP;
-			if (ip_fragment(ip, &m, mtu, ifp->if_hwassist))
+			if (ip_fragment(ip, &m, nh.nh_mtu,
+			    nh.nh_ifp->if_hwassist) != 0)
 				goto drop;
 			KASSERT(m != NULL, ("null mbuf and no error"));
 			/*
 			 * Send off the fragments via outgoing interface
 			 */
 			error = 0;
 			do {
 				m0 = m->m_nextpkt;
 				m->m_nextpkt = NULL;
 				/*
 				 * Avoid confusing lower layers.
 				 */
 				m_clrprotoflags(m);
 
-				IP_PROBE(send, NULL, NULL, ip, ifp, ip, NULL);
-				error = (*ifp->if_output)(ifp, m,
-					(struct sockaddr *)dst, &ro);
+				IP_PROBE(send, NULL, NULL, ip, nh.nh_ifp,
+				    ip, NULL);
+				/* XXX: we can use cached route here */
+				error = (*nh.nh_ifp->if_output)(nh.nh_ifp, m,
+				    (struct sockaddr *)&dst, NULL);
 				if (error)
 					break;
 			} while ((m = m0) != NULL);
 			if (error) {
 				/* Reclaim remaining fragments */
 				for (m = m0; m; m = m0) {
 					m0 = m->m_nextpkt;
 					m_freem(m);
 				}
 			} else
 				IPSTAT_INC(ips_fragmented);
 		}
 	}
 
 	if (error != 0)
 		IPSTAT_INC(ips_odropped);
 	else {
-		counter_u64_add(ro.ro_rt->rt_pksent, 1);
 		IPSTAT_INC(ips_forward);
 		IPSTAT_INC(ips_fastforward);
 	}
 consumed:
-	RTFREE(ro.ro_rt);
 	return NULL;
 drop:
 	if (m)
 		m_freem(m);
-	if (ro.ro_rt)
-		RTFREE(ro.ro_rt);
 	return NULL;
 }
Index: projects/clang391-import/usr.bin/clang/clang.prog.mk
===================================================================
--- projects/clang391-import/usr.bin/clang/clang.prog.mk	(revision 309262)
+++ projects/clang391-import/usr.bin/clang/clang.prog.mk	(revision 309263)
@@ -1,21 +1,23 @@
 # $FreeBSD$
 
 .include "${SRCTOP}/lib/clang/clang.pre.mk"
 
 CFLAGS+=	-I${OBJTOP}/lib/clang/libclang
 CFLAGS+=	-I${OBJTOP}/lib/clang/libllvm
 
 .include "${SRCTOP}/lib/clang/clang.build.mk"
 
 LIBDEPS+=	clang
 LIBDEPS+=	llvm
 
 .for lib in ${LIBDEPS}
 DPADD+=		${OBJTOP}/lib/clang/lib${lib}/lib${lib}.a
 LDADD+=		${OBJTOP}/lib/clang/lib${lib}/lib${lib}.a
 .endfor
 
+PACKAGE=	clang
+
 LIBADD+=	ncursesw
 LIBADD+=	pthread
 
 .include <bsd.prog.mk>
Index: projects/clang391-import/usr.bin/clang/lld/Makefile
===================================================================
--- projects/clang391-import/usr.bin/clang/lld/Makefile	(revision 309262)
+++ projects/clang391-import/usr.bin/clang/lld/Makefile	(revision 309263)
@@ -1,74 +1,75 @@
 # $FreeBSD$
 
 .include <src.opts.mk>
 
 LLVM_SRCS=	${SRCTOP}/contrib/llvm
 LLD_SRCS=	${LLVM_SRCS}/tools/lld
 
+PACKAGE=	lld
 PROG_CXX=	ld.lld
 MAN=
 .if ${MK_LLD_AS_LD} != "no"
 SYMLINKS=	${PROG_CXX} ${BINDIR}/ld
 .endif
 
 CFLAGS+=	-I${LLD_SRCS}/include
 CFLAGS+=	-I${.OBJDIR}
 CFLAGS+=	-I${.OBJDIR}/../../../lib/clang/libllvm
 
 SRCDIR=		tools/lld
 SRCS+=		ELF/Driver.cpp
 SRCS+=		ELF/DriverUtils.cpp
 SRCS+=		ELF/EhFrame.cpp
 SRCS+=		ELF/Error.cpp
 SRCS+=		ELF/ICF.cpp
 SRCS+=		ELF/InputFiles.cpp
 SRCS+=		ELF/InputSection.cpp
 SRCS+=		ELF/LinkerScript.cpp
 SRCS+=		ELF/LTO.cpp
 SRCS+=		ELF/MarkLive.cpp
 SRCS+=		ELF/OutputSections.cpp
 SRCS+=		ELF/Relocations.cpp
 SRCS+=		ELF/ScriptParser.cpp
 SRCS+=		ELF/Strings.cpp
 SRCS+=		ELF/SymbolListFile.cpp
 SRCS+=		ELF/SymbolTable.cpp
 SRCS+=		ELF/Symbols.cpp
 SRCS+=		ELF/Target.cpp
 SRCS+=		ELF/Thunks.cpp
 SRCS+=		ELF/Writer.cpp
 SRCS+=		lib/Config/Version.cpp
 SRCS+=		lib/Core/DefinedAtom.cpp
 SRCS+=		lib/Core/Error.cpp
 SRCS+=		lib/Core/File.cpp
 SRCS+=		lib/Core/LinkingContext.cpp
 SRCS+=		lib/Core/Reader.cpp
 SRCS+=		lib/Core/Resolver.cpp
 SRCS+=		lib/Core/SymbolTable.cpp
 SRCS+=		lib/Core/Writer.cpp
 SRCS+=		tools/lld/lld.cpp
 
 .include "${SRCTOP}/lib/clang/llvm.build.mk"
 
 LIBDEPS+=	llvm
 
 .for lib in ${LIBDEPS}
 DPADD+=		${OBJTOP}/lib/clang/lib${lib}/lib${lib}.a
 LDADD+=		${OBJTOP}/lib/clang/lib${lib}/lib${lib}.a
 .endfor
 
 LLVM_TBLGEN?=	llvm-tblgen
 ELF/Options.inc: ${LLD_SRCS}/ELF/Options.td
 	${LLVM_TBLGEN} -gen-opt-parser-defs \
 	    -I ${LLVM_SRCS}/include \
 	    -d ${.TARGET:C/$/.d/} -o ${.TARGET} \
 	    ${LLVM_SRCS}/tools/lld/ELF/Options.td
 TGHDRS+=	ELF/Options.inc
 
 DPSRCS+=	${TGHDRS}
 CLEANFILES+=	${TGHDRS} ${TGHDRS:C/$/.d/}
 
 LIBADD+=	ncursesw
 LIBADD+=	pthread
 LIBADD+=	z
 
 .include <bsd.prog.mk>
Index: projects/clang391-import/usr.bin/clang/lldb/Makefile
===================================================================
--- projects/clang391-import/usr.bin/clang/lldb/Makefile	(revision 309262)
+++ projects/clang391-import/usr.bin/clang/lldb/Makefile	(revision 309263)
@@ -1,31 +1,32 @@
 # $FreeBSD$
 
 .include "${SRCTOP}/lib/clang/lldb.pre.mk"
 
+PACKAGE=	lldb
 PROG_CXX=	lldb
 # Man page directory
 .PATH:		${LLDB_SRCS}/docs
 
 CFLAGS+=	-I${LLDB_SRCS}/include
 
 SRCDIR=		tools/lldb/tools/driver
 SRCS+=		Driver.cpp
 
 .include "${SRCTOP}/lib/clang/clang.build.mk"
 
 LIBDEPS+=	lldb
 LIBDEPS+=	clang
 LIBDEPS+=	llvm
 
 .for lib in ${LIBDEPS}
 DPADD+=		${OBJTOP}/lib/clang/lib${lib}/lib${lib}.a
 LDADD+=		${OBJTOP}/lib/clang/lib${lib}/lib${lib}.a
 .endfor
 
 LIBADD+=	edit
 LIBADD+=	panel
 LIBADD+=	ncursesw
 LIBADD+=	pthread
 LIBADD+=	z
 
 .include <bsd.prog.mk>
Index: projects/clang391-import/usr.bin/clang/llvm.prog.mk
===================================================================
--- projects/clang391-import/usr.bin/clang/llvm.prog.mk	(revision 309262)
+++ projects/clang391-import/usr.bin/clang/llvm.prog.mk	(revision 309263)
@@ -1,25 +1,27 @@
 # $FreeBSD$
 
 .include "${SRCTOP}/lib/clang/llvm.pre.mk"
 
 CFLAGS+=	-I${OBJTOP}/lib/clang/libllvm
 
 .include "${SRCTOP}/lib/clang/llvm.build.mk"
 
 # Special case for the bootstrap-tools phase.
 .if (defined(TOOLS_PREFIX) || ${MACHINE} == "host") && \
     (${PROG_CXX} == "clang-tblgen" || ${PROG_CXX} == "llvm-tblgen")
 LIBDEPS+=	llvmminimal
 .else
 LIBDEPS+=	llvm
 .endif
 
 .for lib in ${LIBDEPS}
 DPADD+=		${OBJTOP}/lib/clang/lib${lib}/lib${lib}.a
 LDADD+=		${OBJTOP}/lib/clang/lib${lib}/lib${lib}.a
 .endfor
 
+PACKAGE=	clang
+
 LIBADD+=	ncursesw
 LIBADD+=	pthread
 
 .include <bsd.prog.mk>
Index: projects/clang391-import/usr.bin/indent/indent.c
===================================================================
--- projects/clang391-import/usr.bin/indent/indent.c	(revision 309262)
+++ projects/clang391-import/usr.bin/indent/indent.c	(revision 309263)
@@ -1,1265 +1,1275 @@
 /*-
  * Copyright (c) 1985 Sun Microsystems, Inc.
  * Copyright (c) 1976 Board of Trustees of the University of Illinois.
  * Copyright (c) 1980, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef lint
 static const char copyright[] =
 "@(#) Copyright (c) 1985 Sun Microsystems, Inc.\n\
 @(#) Copyright (c) 1976 Board of Trustees of the University of Illinois.\n\
 @(#) Copyright (c) 1980, 1993\n\
 	The Regents of the University of California.  All rights reserved.\n";
 #endif /* not lint */
 
 #if 0
 #ifndef lint
 static char sccsid[] = "@(#)indent.c	5.17 (Berkeley) 6/7/93";
 #endif /* not lint */
 #endif
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/capsicum.h>
 #include <err.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <unistd.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <ctype.h>
 #include "indent_globs.h"
 #include "indent_codes.h"
 #include "indent.h"
 
 static void bakcopy(void);
 static void indent_declaration(int, int);
 
 const char *in_name = "Standard Input";	/* will always point to name of input
 					 * file */
 const char *out_name = "Standard Output";	/* will always point to name
 						 * of output file */
 char        bakfile[MAXPATHLEN] = "";
 
 int
 main(int argc, char **argv)
 {
     cap_rights_t rights;
 
     int         dec_ind;	/* current indentation for declarations */
     int         di_stack[20];	/* a stack of structure indentation levels */
     int         flushed_nl;	/* used when buffering up comments to remember
 				 * that a newline was passed over */
     int         force_nl;	/* when true, code must be broken */
     int         hd_type = 0;	/* used to store type of stmt for if (...),
 				 * for (...), etc */
     int		i;		/* local loop counter */
     int         scase;		/* set to true when we see a case, so we will
 				 * know what to do with the following colon */
     int         sp_sw;		/* when true, we are in the expression of
 				 * if(...), while(...), etc. */
     int         squest;		/* when this is positive, we have seen a ?
 				 * without the matching : in a <c>?<s>:<s>
 				 * construct */
     const char *t_ptr;		/* used for copying tokens */
     int		tabs_to_var;	/* true if using tabs to indent to var name */
     int         type_code;	/* the type of token, returned by lexi */
 
     int         last_else = 0;	/* true iff last keyword was an else */
 
 
     /*-----------------------------------------------*\
     |		      INITIALIZATION		      |
     \*-----------------------------------------------*/
 
     found_err = 0;
 
     ps.p_stack[0] = stmt;	/* this is the parser's stack */
     ps.last_nl = true;		/* this is true if the last thing scanned was
 				 * a newline */
     ps.last_token = semicolon;
     combuf = (char *) malloc(bufsize);
     if (combuf == NULL)
 	err(1, NULL);
     labbuf = (char *) malloc(bufsize);
     if (labbuf == NULL)
 	err(1, NULL);
     codebuf = (char *) malloc(bufsize);
     if (codebuf == NULL)
 	err(1, NULL);
     tokenbuf = (char *) malloc(bufsize);
     if (tokenbuf == NULL)
 	err(1, NULL);
     alloc_typenames();
     l_com = combuf + bufsize - 5;
     l_lab = labbuf + bufsize - 5;
     l_code = codebuf + bufsize - 5;
     l_token = tokenbuf + bufsize - 5;
     combuf[0] = codebuf[0] = labbuf[0] = ' ';	/* set up code, label, and
 						 * comment buffers */
     combuf[1] = codebuf[1] = labbuf[1] = '\0';
     ps.else_if = 1;		/* Default else-if special processing to on */
     s_lab = e_lab = labbuf + 1;
     s_code = e_code = codebuf + 1;
     s_com = e_com = combuf + 1;
     s_token = e_token = tokenbuf + 1;
 
     in_buffer = (char *) malloc(10);
     if (in_buffer == NULL)
 	err(1, NULL);
     in_buffer_limit = in_buffer + 8;
     buf_ptr = buf_end = in_buffer;
     line_no = 1;
     had_eof = ps.in_decl = ps.decl_on_line = break_comma = false;
     sp_sw = force_nl = false;
     ps.in_or_st = false;
     ps.bl_line = true;
     dec_ind = 0;
     di_stack[ps.dec_nest = 0] = 0;
     ps.want_blank = ps.in_stmt = ps.ind_stmt = false;
 
     scase = ps.pcase = false;
     squest = 0;
     sc_end = NULL;
     bp_save = NULL;
     be_save = NULL;
 
     output = NULL;
     tabs_to_var = 0;
 
     /*--------------------------------------------------*\
     |   		COMMAND LINE SCAN		 |
     \*--------------------------------------------------*/
 
 #ifdef undef
     max_col = 78;		/* -l78 */
     lineup_to_parens = 1;	/* -lp */
     ps.ljust_decl = 0;		/* -ndj */
     ps.com_ind = 33;		/* -c33 */
     star_comment_cont = 1;	/* -sc */
     ps.ind_size = 8;		/* -i8 */
     verbose = 0;
     ps.decl_indent = 16;	/* -di16 */
     ps.local_decl_indent = -1;	/* if this is not set to some nonnegative value
 				 * by an arg, we will set this equal to
 				 * ps.decl_ind */
     ps.indent_parameters = 1;	/* -ip */
     ps.decl_com_ind = 0;	/* if this is not set to some positive value
 				 * by an arg, we will set this equal to
 				 * ps.com_ind */
     btype_2 = 1;		/* -br */
     cuddle_else = 1;		/* -ce */
     ps.unindent_displace = 0;	/* -d0 */
     ps.case_indent = 0;		/* -cli0 */
     format_block_comments = 1;	/* -fcb */
     format_col1_comments = 1;	/* -fc1 */
     procnames_start_line = 1;	/* -psl */
     proc_calls_space = 0;	/* -npcs */
     comment_delimiter_on_blankline = 1;	/* -cdb */
     ps.leave_comma = 1;		/* -nbc */
 #endif
 
     for (i = 1; i < argc; ++i)
 	if (strcmp(argv[i], "-npro") == 0)
 	    break;
     set_defaults();
     if (i >= argc)
 	set_profile();
 
     for (i = 1; i < argc; ++i) {
 
 	/*
 	 * look thru args (if any) for changes to defaults
 	 */
 	if (argv[i][0] != '-') {/* no flag on parameter */
 	    if (input == NULL) {	/* we must have the input file */
 		in_name = argv[i];	/* remember name of input file */
 		input = fopen(in_name, "r");
 		if (input == NULL)	/* check for open error */
 			err(1, "%s", in_name);
 		continue;
 	    }
 	    else if (output == NULL) {	/* we have the output file */
 		out_name = argv[i];	/* remember name of output file */
 		if (strcmp(in_name, out_name) == 0) {	/* attempt to overwrite
 							 * the file */
 		    errx(1, "input and output files must be different");
 		}
 		output = fopen(out_name, "w");
 		if (output == NULL)	/* check for create error */
 			err(1, "%s", out_name);
 		continue;
 	    }
 	    errx(1, "unknown parameter: %s", argv[i]);
 	}
 	else
 	    set_option(argv[i]);
     }				/* end of for */
     if (input == NULL)
 	input = stdin;
     if (output == NULL) {
 	if (troff || input == stdin)
 	    output = stdout;
 	else {
 	    out_name = in_name;
 	    bakcopy();
 	}
     }
 
     /* Restrict input/output descriptors and enter Capsicum sandbox. */
     cap_rights_init(&rights, CAP_FSTAT, CAP_WRITE);
     if (cap_rights_limit(fileno(output), &rights) < 0 && errno != ENOSYS)
 	err(EXIT_FAILURE, "unable to limit rights for %s", out_name);
     cap_rights_init(&rights, CAP_FSTAT, CAP_READ);
     if (cap_rights_limit(fileno(input), &rights) < 0 && errno != ENOSYS)
 	err(EXIT_FAILURE, "unable to limit rights for %s", in_name);
     if (cap_enter() < 0 && errno != ENOSYS)
 	err(EXIT_FAILURE, "unable to enter capability mode");
 
     if (ps.com_ind <= 1)
 	ps.com_ind = 2;		/* dont put normal comments before column 2 */
     if (troff) {
 	if (bodyf.font[0] == 0)
 	    parsefont(&bodyf, "R");
 	if (scomf.font[0] == 0)
 	    parsefont(&scomf, "I");
 	if (blkcomf.font[0] == 0)
 	    blkcomf = scomf, blkcomf.size += 2;
 	if (boxcomf.font[0] == 0)
 	    boxcomf = blkcomf;
 	if (stringf.font[0] == 0)
 	    parsefont(&stringf, "L");
 	if (keywordf.font[0] == 0)
 	    parsefont(&keywordf, "B");
 	writefdef(&bodyf, 'B');
 	writefdef(&scomf, 'C');
 	writefdef(&blkcomf, 'L');
 	writefdef(&boxcomf, 'X');
 	writefdef(&stringf, 'S');
 	writefdef(&keywordf, 'K');
     }
     if (block_comment_max_col <= 0)
 	block_comment_max_col = max_col;
     if (ps.local_decl_indent < 0)	/* if not specified by user, set this */
 	ps.local_decl_indent = ps.decl_indent;
     if (ps.decl_com_ind <= 0)	/* if not specified by user, set this */
 	ps.decl_com_ind = ps.ljust_decl ? (ps.com_ind <= 10 ? 2 : ps.com_ind - 8) : ps.com_ind;
     if (continuation_indent == 0)
 	continuation_indent = ps.ind_size;
     fill_buffer();		/* get first batch of stuff into input buffer */
 
     parse(semicolon);
     {
 	char *p = buf_ptr;
 	int col = 1;
 
 	while (1) {
 	    if (*p == ' ')
 		col++;
 	    else if (*p == '\t')
 		col = ((col - 1) & ~7) + 9;
 	    else
 		break;
 	    p++;
 	}
 	if (col > ps.ind_size)
 	    ps.ind_level = ps.i_l_follow = col / ps.ind_size;
     }
     if (troff) {
 	const char *p = in_name,
 	           *beg = in_name;
 
 	while (*p)
 	    if (*p++ == '/')
 		beg = p;
 	fprintf(output, ".Fn \"%s\"\n", beg);
     }
     /*
      * START OF MAIN LOOP
      */
 
     while (1) {			/* this is the main loop.  it will go until we
 				 * reach eof */
 	int         is_procname;
 
 	type_code = lexi();	/* lexi reads one token.  The actual
 				 * characters read are stored in "token". lexi
 				 * returns a code indicating the type of token */
 	is_procname = ps.procname[0];
 
 	/*
 	 * The following code moves everything following an if (), while (),
 	 * else, etc. up to the start of the following stmt to a buffer. This
 	 * allows proper handling of both kinds of brace placement.
 	 */
 
 	flushed_nl = false;
 	while (ps.search_brace) {	/* if we scanned an if(), while(),
 					 * etc., we might need to copy stuff
 					 * into a buffer we must loop, copying
 					 * stuff into save_com, until we find
 					 * the start of the stmt which follows
 					 * the if, or whatever */
 	    switch (type_code) {
 	    case newline:
 		++line_no;
 		if (sc_end != NULL)
 		    goto sw_buffer;	/* dump comment, if any */
 		flushed_nl = true;
 	    case form_feed:
 		break;		/* form feeds and newlines found here will be
 				 * ignored */
 
 	    case lbrace:	/* this is a brace that starts the compound
 				 * stmt */
 		if (sc_end == NULL) {	/* ignore buffering if a comment wasn't
 					 * stored up */
 		    ps.search_brace = false;
 		    goto check_type;
 		}
 		if (btype_2) {
 		    save_com[0] = '{';	/* we either want to put the brace
 					 * right after the if */
 		    goto sw_buffer;	/* go to common code to get out of
 					 * this loop */
 		}
 	    case comment:	/* we have a comment, so we must copy it into
 				 * the buffer */
 		if (!flushed_nl || sc_end != NULL) {
 		    if (sc_end == NULL) { /* if this is the first comment, we
 					   * must set up the buffer */
 			save_com[0] = save_com[1] = ' ';
 			sc_end = &(save_com[2]);
 		    }
 		    else {
 			*sc_end++ = '\n';	/* add newline between
 						 * comments */
 			*sc_end++ = ' ';
 			--line_no;
 		    }
 		    *sc_end++ = '/';	/* copy in start of comment */
 		    *sc_end++ = '*';
 
 		    for (;;) {	/* loop until we get to the end of the comment */
 			*sc_end = *buf_ptr++;
 			if (buf_ptr >= buf_end)
 			    fill_buffer();
 
 			if (*sc_end++ == '*' && *buf_ptr == '/')
 			    break;	/* we are at end of comment */
 
 			if (sc_end >= &(save_com[sc_size])) {	/* check for temp buffer
 								 * overflow */
 			    diag2(1, "Internal buffer overflow - Move big comment from right after if, while, or whatever");
 			    fflush(output);
 			    exit(1);
 			}
 		    }
 		    *sc_end++ = '/';	/* add ending slash */
 		    if (++buf_ptr >= buf_end)	/* get past / in buffer */
 			fill_buffer();
 		    break;
 		}
 	    default:		/* it is the start of a normal statement */
 		if (flushed_nl)	/* if we flushed a newline, make sure it is
 				 * put back */
 		    force_nl = true;
 		if ((type_code == sp_paren && *token == 'i'
 			&& last_else && ps.else_if)
 			|| (type_code == sp_nparen && *token == 'e'
 			&& e_code != s_code && e_code[-1] == '}'))
 		    force_nl = false;
 
 		if (sc_end == NULL) {	/* ignore buffering if comment wasn't
 					 * saved up */
 		    ps.search_brace = false;
 		    goto check_type;
 		}
 		if (force_nl) {	/* if we should insert a nl here, put it into
 				 * the buffer */
 		    force_nl = false;
 		    --line_no;	/* this will be re-increased when the nl is
 				 * read from the buffer */
 		    *sc_end++ = '\n';
 		    *sc_end++ = ' ';
 		    if (verbose && !flushed_nl)	/* print error msg if the line
 						 * was not already broken */
 			diag2(0, "Line broken");
 		    flushed_nl = false;
 		}
 		for (t_ptr = token; *t_ptr; ++t_ptr)
 		    *sc_end++ = *t_ptr;	/* copy token into temp buffer */
 		ps.procname[0] = 0;
 
 	sw_buffer:
 		ps.search_brace = false;	/* stop looking for start of
 						 * stmt */
 		bp_save = buf_ptr;	/* save current input buffer */
 		be_save = buf_end;
 		buf_ptr = save_com;	/* fix so that subsequent calls to
 					 * lexi will take tokens out of
 					 * save_com */
 		*sc_end++ = ' ';/* add trailing blank, just in case */
 		buf_end = sc_end;
 		sc_end = NULL;
 		break;
 	    }			/* end of switch */
 	    if (type_code != 0)	/* we must make this check, just in case there
 				 * was an unexpected EOF */
 		type_code = lexi();	/* read another token */
 	    /* if (ps.search_brace) ps.procname[0] = 0; */
 	    if ((is_procname = ps.procname[0]) && flushed_nl
 		    && !procnames_start_line && ps.in_decl
 		    && type_code == ident)
 		flushed_nl = 0;
 	}			/* end of while (search_brace) */
 	last_else = 0;
 check_type:
 	if (type_code == 0) {	/* we got eof */
 	    if (s_lab != e_lab || s_code != e_code
 		    || s_com != e_com)	/* must dump end of line */
 		dump_line();
 	    if (ps.tos > 1)	/* check for balanced braces */
 		diag2(1, "Stuff missing from end of file");
 
 	    if (verbose) {
 		printf("There were %d output lines and %d comments\n",
 		       ps.out_lines, ps.out_coms);
 		printf("(Lines with comments)/(Lines with code): %6.3f\n",
 		       (1.0 * ps.com_lines) / code_lines);
 	    }
 	    fflush(output);
 	    exit(found_err);
 	}
 	if (
 		(type_code != comment) &&
 		(type_code != newline) &&
 		(type_code != preesc) &&
 		(type_code != form_feed)) {
 	    if (force_nl &&
 		    (type_code != semicolon) &&
 		    (type_code != lbrace || !btype_2)) {
 		/* we should force a broken line here */
 		if (verbose && !flushed_nl)
 		    diag2(0, "Line broken");
 		flushed_nl = false;
 		dump_line();
 		ps.want_blank = false;	/* dont insert blank at line start */
 		force_nl = false;
 	    }
 	    ps.in_stmt = true;	/* turn on flag which causes an extra level of
 				 * indentation. this is turned off by a ; or
 				 * '}' */
 	    if (s_com != e_com) {	/* the turkey has embedded a comment
 					 * in a line. fix it */
 		*e_code++ = ' ';
 		for (t_ptr = s_com; *t_ptr; ++t_ptr) {
 		    CHECK_SIZE_CODE;
 		    *e_code++ = *t_ptr;
 		}
 		*e_code++ = ' ';
 		*e_code = '\0';	/* null terminate code sect */
 		ps.want_blank = false;
 		e_com = s_com;
 	    }
 	}
 	else if (type_code != comment)	/* preserve force_nl thru a comment */
 	    force_nl = false;	/* cancel forced newline after newline, form
 				 * feed, etc */
 
 
 
 	/*-----------------------------------------------------*\
 	|	   do switch on type of token scanned		|
 	\*-----------------------------------------------------*/
 	CHECK_SIZE_CODE;
 	switch (type_code) {	/* now, decide what to do with the token */
 
 	case form_feed:	/* found a form feed in line */
 	    ps.use_ff = true;	/* a form feed is treated much like a newline */
 	    dump_line();
 	    ps.want_blank = false;
 	    break;
 
 	case newline:
 	    if (ps.last_token != comma || ps.p_l_follow > 0
 		    || !ps.leave_comma || ps.block_init || !break_comma || s_com != e_com) {
 		dump_line();
 		ps.want_blank = false;
 	    }
 	    ++line_no;		/* keep track of input line number */
 	    break;
 
 	case lparen:		/* got a '(' or '[' */
 	    ++ps.p_l_follow;	/* count parens to make Healy happy */
 	    if (ps.want_blank && *token != '[' &&
 		    (ps.last_token != ident || proc_calls_space ||
 		    /* offsetof (1) is never allowed a space; sizeof (2) gets
 		     * one iff -bs; all other keywords (>2) always get a space
 		     * before lparen */
 			(ps.keyword + Bill_Shannon > 2)))
 		*e_code++ = ' ';
 	    ps.want_blank = false;
 	    if (ps.in_decl && !ps.block_init && !ps.dumped_decl_indent &&
 		!is_procname) {
 		/* function pointer declarations */
 		if (troff) {
 		    sprintf(e_code, "\n.Du %dp+\200p \"%s\"\n", dec_ind * 7, token);
 		    e_code += strlen(e_code);
 		}
 		else {
 		    indent_declaration(dec_ind, tabs_to_var);
 		}
 		ps.dumped_decl_indent = true;
 	    }
 	    if (!troff)
 		*e_code++ = token[0];
 	    ps.paren_indents[ps.p_l_follow - 1] = e_code - s_code;
 	    if (sp_sw && ps.p_l_follow == 1 && extra_expression_indent
 		    && ps.paren_indents[0] < 2 * ps.ind_size)
 		ps.paren_indents[0] = 2 * ps.ind_size;
 	    if (ps.in_or_st && *token == '(' && ps.tos <= 2) {
 		/*
 		 * this is a kluge to make sure that declarations will be
 		 * aligned right if proc decl has an explicit type on it, i.e.
 		 * "int a(x) {..."
 		 */
 		parse(semicolon);	/* I said this was a kluge... */
 		ps.in_or_st = false;	/* turn off flag for structure decl or
 					 * initialization */
 	    }
 	    /* parenthesized type following sizeof or offsetof is not a cast */
 	    if (ps.keyword == 1 || ps.keyword == 2)
 		ps.not_cast_mask |= 1 << ps.p_l_follow;
 	    break;
 
 	case rparen:		/* got a ')' or ']' */
 	    rparen_count--;
 	    if (ps.cast_mask & (1 << ps.p_l_follow) & ~ps.not_cast_mask) {
 		ps.last_u_d = true;
 		ps.cast_mask &= (1 << ps.p_l_follow) - 1;
 		ps.want_blank = space_after_cast;
 	    } else
 		ps.want_blank = true;
 	    ps.not_cast_mask &= (1 << ps.p_l_follow) - 1;
 	    if (--ps.p_l_follow < 0) {
 		ps.p_l_follow = 0;
 		diag3(0, "Extra %c", *token);
 	    }
 	    if (e_code == s_code)	/* if the paren starts the line */
 		ps.paren_level = ps.p_l_follow;	/* then indent it */
 
 	    *e_code++ = token[0];
 
 	    if (sp_sw && (ps.p_l_follow == 0)) {	/* check for end of if
 							 * (...), or some such */
 		sp_sw = false;
 		force_nl = true;/* must force newline after if */
 		ps.last_u_d = true;	/* inform lexi that a following
 					 * operator is unary */
 		ps.in_stmt = false;	/* dont use stmt continuation
 					 * indentation */
 
 		parse(hd_type);	/* let parser worry about if, or whatever */
 	    }
 	    ps.search_brace = btype_2;	/* this should insure that constructs
 					 * such as main(){...} and int[]{...}
 					 * have their braces put in the right
 					 * place */
 	    break;
 
 	case unary_op:		/* this could be any unary operation */
 	    if (!ps.dumped_decl_indent && ps.in_decl && !is_procname &&
 		!ps.block_init) {
 		/* pointer declarations */
 		if (troff) {
 		    if (ps.want_blank)
 			*e_code++ = ' ';
 		    sprintf(e_code, "\n.Du %dp+\200p \"%s\"\n", dec_ind * 7,
 			token);
 		    e_code += strlen(e_code);
 		}
 		else {
 			/* if this is a unary op in a declaration, we should
 			 * indent this token */
 			for (i = 0; token[i]; ++i)
 			    /* find length of token */;
 			indent_declaration(dec_ind - i, tabs_to_var);
 		}
 		ps.dumped_decl_indent = true;
 	    }
 	    else if (ps.want_blank)
 		*e_code++ = ' ';
 	    {
 		const char *res = token;
 
 		if (troff && token[0] == '-' && token[1] == '>')
 		    res = "\\(->";
 		for (t_ptr = res; *t_ptr; ++t_ptr) {
 		    CHECK_SIZE_CODE;
 		    *e_code++ = *t_ptr;
 		}
 	    }
 	    ps.want_blank = false;
 	    break;
 
 	case binary_op:	/* any binary operation */
 	    if (ps.want_blank)
 		*e_code++ = ' ';
 	    {
 		const char *res = token;
 
 		if (troff)
 		    switch (token[0]) {
 		    case '<':
 			if (token[1] == '=')
 			    res = "\\(<=";
 			break;
 		    case '>':
 			if (token[1] == '=')
 			    res = "\\(>=";
 			break;
 		    case '!':
 			if (token[1] == '=')
 			    res = "\\(!=";
 			break;
 		    case '|':
 			if (token[1] == '|')
 			    res = "\\(br\\(br";
 			else if (token[1] == 0)
 			    res = "\\(br";
 			break;
 		    }
 		for (t_ptr = res; *t_ptr; ++t_ptr) {
 		    CHECK_SIZE_CODE;
 		    *e_code++ = *t_ptr;	/* move the operator */
 		}
 	    }
 	    ps.want_blank = true;
 	    break;
 
 	case postop:		/* got a trailing ++ or -- */
 	    *e_code++ = token[0];
 	    *e_code++ = token[1];
 	    ps.want_blank = true;
 	    break;
 
 	case question:		/* got a ? */
 	    squest++;		/* this will be used when a later colon
 				 * appears so we can distinguish the
 				 * <c>?<n>:<n> construct */
 	    if (ps.want_blank)
 		*e_code++ = ' ';
 	    *e_code++ = '?';
 	    ps.want_blank = true;
 	    break;
 
 	case casestmt:		/* got word 'case' or 'default' */
 	    scase = true;	/* so we can process the later colon properly */
 	    goto copy_id;
 
 	case colon:		/* got a ':' */
 	    if (squest > 0) {	/* it is part of the <c>?<n>: <n> construct */
 		--squest;
 		if (ps.want_blank)
 		    *e_code++ = ' ';
 		*e_code++ = ':';
 		ps.want_blank = true;
 		break;
 	    }
 	    if (ps.in_or_st) {
 		*e_code++ = ':';
 		ps.want_blank = false;
 		break;
 	    }
 	    ps.in_stmt = false;	/* seeing a label does not imply we are in a
 				 * stmt */
 	    for (t_ptr = s_code; *t_ptr; ++t_ptr)
 		*e_lab++ = *t_ptr;	/* turn everything so far into a label */
 	    e_code = s_code;
 	    *e_lab++ = ':';
 	    *e_lab++ = ' ';
 	    *e_lab = '\0';
 
 	    force_nl = ps.pcase = scase;	/* ps.pcase will be used by
 						 * dump_line to decide how to
 						 * indent the label. force_nl
 						 * will force a case n: to be
 						 * on a line by itself */
 	    scase = false;
 	    ps.want_blank = false;
 	    break;
 
 	case semicolon:	/* got a ';' */
 	    if (ps.dec_nest == 0)
 		ps.in_or_st = false;/* we are not in an initialization or
 				     * structure declaration */
 	    scase = false;	/* these will only need resetting in an error */
 	    squest = 0;
 	    if (ps.last_token == rparen && rparen_count == 0)
 		ps.in_parameter_declaration = 0;
 	    ps.cast_mask = 0;
 	    ps.not_cast_mask = 0;
 	    ps.block_init = 0;
 	    ps.block_init_level = 0;
 	    ps.just_saw_decl--;
 
 	    if (ps.in_decl && s_code == e_code && !ps.block_init &&
 		!ps.dumped_decl_indent) {
 		/* indent stray semicolons in declarations */
 		indent_declaration(dec_ind - 1, tabs_to_var);
 		ps.dumped_decl_indent = true;
 	    }
 
 	    ps.in_decl = (ps.dec_nest > 0);	/* if we were in a first level
 						 * structure declaration, we
 						 * arent any more */
 
 	    if ((!sp_sw || hd_type != forstmt) && ps.p_l_follow > 0) {
 
 		/*
 		 * This should be true iff there were unbalanced parens in the
 		 * stmt.  It is a bit complicated, because the semicolon might
 		 * be in a for stmt
 		 */
 		diag2(1, "Unbalanced parens");
 		ps.p_l_follow = 0;
 		if (sp_sw) {	/* this is a check for an if, while, etc. with
 				 * unbalanced parens */
 		    sp_sw = false;
 		    parse(hd_type);	/* dont lose the if, or whatever */
 		}
 	    }
 	    *e_code++ = ';';
 	    ps.want_blank = true;
 	    ps.in_stmt = (ps.p_l_follow > 0);	/* we are no longer in the
 						 * middle of a stmt */
 
 	    if (!sp_sw) {	/* if not if for (;;) */
 		parse(semicolon);	/* let parser know about end of stmt */
 		force_nl = true;/* force newline after an end of stmt */
 	    }
 	    break;
 
 	case lbrace:		/* got a '{' */
 	    ps.in_stmt = false;	/* dont indent the {} */
 	    if (!ps.block_init)
 		force_nl = true;/* force other stuff on same line as '{' onto
 				 * new line */
 	    else if (ps.block_init_level <= 0)
 		ps.block_init_level = 1;
 	    else
 		ps.block_init_level++;
 
 	    if (s_code != e_code && !ps.block_init) {
 		if (!btype_2) {
 		    dump_line();
 		    ps.want_blank = false;
 		}
 		else if (ps.in_parameter_declaration && !ps.in_or_st) {
 		    ps.i_l_follow = 0;
 		    if (function_brace_split) {	/* dump the line prior to the
 						 * brace ... */
 			dump_line();
 			ps.want_blank = false;
 		    } else	/* add a space between the decl and brace */
 			ps.want_blank = true;
 		}
 	    }
 	    if (ps.in_parameter_declaration)
 		prefix_blankline_requested = 0;
 
 	    if (ps.p_l_follow > 0) {	/* check for preceding unbalanced
 					 * parens */
 		diag2(1, "Unbalanced parens");
 		ps.p_l_follow = 0;
 		if (sp_sw) {	/* check for unclosed if, for, etc. */
 		    sp_sw = false;
 		    parse(hd_type);
 		    ps.ind_level = ps.i_l_follow;
 		}
 	    }
 	    if (s_code == e_code)
 		ps.ind_stmt = false;	/* dont put extra indentation on line
 					 * with '{' */
 	    if (ps.in_decl && ps.in_or_st) {	/* this is either a structure
 						 * declaration or an init */
 		di_stack[ps.dec_nest++] = dec_ind;
 		/* ?		dec_ind = 0; */
 	    }
 	    else {
 		ps.decl_on_line = false;	/* we can't be in the middle of
 						 * a declaration, so don't do
 						 * special indentation of
 						 * comments */
 		if (blanklines_after_declarations_at_proctop
 			&& ps.in_parameter_declaration)
 		    postfix_blankline_requested = 1;
 		ps.in_parameter_declaration = 0;
 	    }
 	    dec_ind = 0;
 	    parse(lbrace);	/* let parser know about this */
 	    if (ps.want_blank)	/* put a blank before '{' if '{' is not at
 				 * start of line */
 		*e_code++ = ' ';
 	    ps.want_blank = false;
 	    *e_code++ = '{';
 	    ps.just_saw_decl = 0;
 	    break;
 
 	case rbrace:		/* got a '}' */
 	    if (ps.p_stack[ps.tos] == decl && !ps.block_init)	/* semicolons can be
 								 * omitted in
 								 * declarations */
 		parse(semicolon);
 	    if (ps.p_l_follow) {/* check for unclosed if, for, else. */
 		diag2(1, "Unbalanced parens");
 		ps.p_l_follow = 0;
 		sp_sw = false;
 	    }
 	    ps.just_saw_decl = 0;
 	    ps.block_init_level--;
 	    if (s_code != e_code && !ps.block_init) {	/* '}' must be first on
 							 * line */
 		if (verbose)
 		    diag2(0, "Line broken");
 		dump_line();
 	    }
 	    *e_code++ = '}';
 	    ps.want_blank = true;
 	    ps.in_stmt = ps.ind_stmt = false;
 	    if (ps.dec_nest > 0) {	/* we are in multi-level structure
 					 * declaration */
 		dec_ind = di_stack[--ps.dec_nest];
 		if (ps.dec_nest == 0 && !ps.in_parameter_declaration)
 		    ps.just_saw_decl = 2;
 		ps.in_decl = true;
 	    }
 	    prefix_blankline_requested = 0;
 	    parse(rbrace);	/* let parser know about this */
 	    ps.search_brace = cuddle_else && ps.p_stack[ps.tos] == ifhead
 		&& ps.il[ps.tos] >= ps.ind_level;
 	    if (ps.tos <= 1 && blanklines_after_procs && ps.dec_nest <= 0)
 		postfix_blankline_requested = 1;
 	    break;
 
 	case swstmt:		/* got keyword "switch" */
 	    sp_sw = true;
 	    hd_type = swstmt;	/* keep this for when we have seen the
 				 * expression */
 	    goto copy_id;	/* go move the token into buffer */
 
 	case sp_paren:		/* token is if, while, for */
 	    sp_sw = true;	/* the interesting stuff is done after the
 				 * expression is scanned */
 	    hd_type = (*token == 'i' ? ifstmt :
 		       (*token == 'w' ? whilestmt : forstmt));
 
 	    /*
 	     * remember the type of header for later use by parser
 	     */
 	    goto copy_id;	/* copy the token into line */
 
 	case sp_nparen:	/* got else, do */
 	    ps.in_stmt = false;
 	    if (*token == 'e') {
 		if (e_code != s_code && (!cuddle_else || e_code[-1] != '}')) {
 		    if (verbose)
 			diag2(0, "Line broken");
 		    dump_line();/* make sure this starts a line */
 		    ps.want_blank = false;
 		}
 		force_nl = true;/* also, following stuff must go onto new line */
 		last_else = 1;
 		parse(elselit);
 	    }
 	    else {
 		if (e_code != s_code) {	/* make sure this starts a line */
 		    if (verbose)
 			diag2(0, "Line broken");
 		    dump_line();
 		    ps.want_blank = false;
 		}
 		force_nl = true;/* also, following stuff must go onto new line */
 		last_else = 0;
 		parse(dolit);
 	    }
 	    goto copy_id;	/* move the token into line */
 
 	case decl:		/* we have a declaration type (int, register,
 				 * etc.) */
 	    parse(decl);	/* let parser worry about indentation */
 	    if (ps.last_token == rparen && ps.tos <= 1) {
 		ps.in_parameter_declaration = 1;
 		if (s_code != e_code) {
 		    dump_line();
 		    ps.want_blank = 0;
 		}
 	    }
 	    if (ps.in_parameter_declaration && ps.indent_parameters && ps.dec_nest == 0) {
 		ps.ind_level = ps.i_l_follow = 1;
 		ps.ind_stmt = 0;
 	    }
 	    ps.in_or_st = true;	/* this might be a structure or initialization
 				 * declaration */
 	    ps.in_decl = ps.decl_on_line = true;
 	    if ( /* !ps.in_or_st && */ ps.dec_nest <= 0)
 		ps.just_saw_decl = 2;
 	    prefix_blankline_requested = 0;
 	    for (i = 0; token[i++];);	/* get length of token */
 
 	    if (ps.ind_level == 0 || ps.dec_nest > 0) {
 		/* global variable or struct member in local variable */
 		dec_ind = ps.decl_indent > 0 ? ps.decl_indent : i;
 		tabs_to_var = (use_tabs ? ps.decl_indent > 0 : 0);
 	    } else {
 		/* local variable */
 		dec_ind = ps.local_decl_indent > 0 ? ps.local_decl_indent : i;
 		tabs_to_var = (use_tabs ? ps.local_decl_indent > 0 : 0);
 	    }
 	    goto copy_id;
 
 	case ident:		/* got an identifier or constant */
 	    if (ps.in_decl) {	/* if we are in a declaration, we must indent
 				 * identifier */
 		if (is_procname == 0 || !procnames_start_line) {
 		    if (!ps.block_init && !ps.dumped_decl_indent) {
 			if (troff) {
 			    if (ps.want_blank)
 				*e_code++ = ' ';
 			    sprintf(e_code, "\n.De %dp+\200p\n", dec_ind * 7);
 			    e_code += strlen(e_code);
 			} else
 			    indent_declaration(dec_ind, tabs_to_var);
 			ps.dumped_decl_indent = true;
 			ps.want_blank = false;
 		    }
 		} else {
 		    if (ps.want_blank)
 			*e_code++ = ' ';
 		    ps.want_blank = false;
 		    if (dec_ind && s_code != e_code) {
 			*e_code = '\0';
 			dump_line();
 		    }
 		    dec_ind = 0;
 		}
 	    }
 	    else if (sp_sw && ps.p_l_follow == 0) {
 		sp_sw = false;
 		force_nl = true;
 		ps.last_u_d = true;
 		ps.in_stmt = false;
 		parse(hd_type);
 	    }
     copy_id:
 	    if (ps.want_blank)
 		*e_code++ = ' ';
 	    if (troff && ps.keyword) {
 		e_code = chfont(&bodyf, &keywordf, e_code);
 		for (t_ptr = token; *t_ptr; ++t_ptr) {
 		    CHECK_SIZE_CODE;
 		    *e_code++ = keywordf.allcaps && islower(*t_ptr)
 			? toupper(*t_ptr) : *t_ptr;
 		}
 		e_code = chfont(&keywordf, &bodyf, e_code);
 	    }
 	    else
 		for (t_ptr = token; *t_ptr; ++t_ptr) {
 		    CHECK_SIZE_CODE;
 		    *e_code++ = *t_ptr;
 		}
 	    ps.want_blank = true;
 	    break;
 
+	case strpfx:
+	    if (ps.want_blank)
+		*e_code++ = ' ';
+	    for (t_ptr = token; *t_ptr; ++t_ptr) {
+		CHECK_SIZE_CODE;
+		*e_code++ = *t_ptr;
+	    }
+	    ps.want_blank = false;
+	    break;
+
 	case period:		/* treat a period kind of like a binary
 				 * operation */
 	    *e_code++ = '.';	/* move the period into line */
 	    ps.want_blank = false;	/* dont put a blank after a period */
 	    break;
 
 	case comma:
 	    ps.want_blank = (s_code != e_code);	/* only put blank after comma
 						 * if comma does not start the
 						 * line */
 	    if (ps.in_decl && is_procname == 0 && !ps.block_init &&
 		!ps.dumped_decl_indent) {
 		/* indent leading commas and not the actual identifiers */
 		indent_declaration(dec_ind - 1, tabs_to_var);
 		ps.dumped_decl_indent = true;
 	    }
 	    *e_code++ = ',';
 	    if (ps.p_l_follow == 0) {
 		if (ps.block_init_level <= 0)
 		    ps.block_init = 0;
 		if (break_comma && (!ps.leave_comma || compute_code_target() + (e_code - s_code) > max_col - 8))
 		    force_nl = true;
 	    }
 	    break;
 
 	case preesc:		/* got the character '#' */
 	    if ((s_com != e_com) ||
 		    (s_lab != e_lab) ||
 		    (s_code != e_code))
 		dump_line();
 	    *e_lab++ = '#';	/* move whole line to 'label' buffer */
 	    {
 		int         in_comment = 0;
 		int         com_start = 0;
 		char        quote = 0;
 		int         com_end = 0;
 
 		while (*buf_ptr == ' ' || *buf_ptr == '\t') {
 		    buf_ptr++;
 		    if (buf_ptr >= buf_end)
 			fill_buffer();
 		}
 		while (*buf_ptr != '\n' || (in_comment && !had_eof)) {
 		    CHECK_SIZE_LAB;
 		    *e_lab = *buf_ptr++;
 		    if (buf_ptr >= buf_end)
 			fill_buffer();
 		    switch (*e_lab++) {
 		    case BACKSLASH:
 			if (troff)
 			    *e_lab++ = BACKSLASH;
 			if (!in_comment) {
 			    *e_lab++ = *buf_ptr++;
 			    if (buf_ptr >= buf_end)
 				fill_buffer();
 			}
 			break;
 		    case '/':
 			if (*buf_ptr == '*' && !in_comment && !quote) {
 			    in_comment = 1;
 			    *e_lab++ = *buf_ptr++;
 			    com_start = e_lab - s_lab - 2;
 			}
 			break;
 		    case '"':
 			if (quote == '"')
 			    quote = 0;
 			break;
 		    case '\'':
 			if (quote == '\'')
 			    quote = 0;
 			break;
 		    case '*':
 			if (*buf_ptr == '/' && in_comment) {
 			    in_comment = 0;
 			    *e_lab++ = *buf_ptr++;
 			    com_end = e_lab - s_lab;
 			}
 			break;
 		    }
 		}
 
 		while (e_lab > s_lab && (e_lab[-1] == ' ' || e_lab[-1] == '\t'))
 		    e_lab--;
 		if (e_lab - s_lab == com_end && bp_save == NULL) {
 		    /* comment on preprocessor line */
 		    if (sc_end == NULL)	/* if this is the first comment, we
 					 * must set up the buffer */
 			sc_end = &(save_com[0]);
 		    else {
 			*sc_end++ = '\n';	/* add newline between
 						 * comments */
 			*sc_end++ = ' ';
 			--line_no;
 		    }
 		    bcopy(s_lab + com_start, sc_end, com_end - com_start);
 		    sc_end += com_end - com_start;
 		    if (sc_end >= &save_com[sc_size])
 			abort();
 		    e_lab = s_lab + com_start;
 		    while (e_lab > s_lab && (e_lab[-1] == ' ' || e_lab[-1] == '\t'))
 			e_lab--;
 		    bp_save = buf_ptr;	/* save current input buffer */
 		    be_save = buf_end;
 		    buf_ptr = save_com;	/* fix so that subsequent calls to
 					 * lexi will take tokens out of
 					 * save_com */
 		    *sc_end++ = ' ';	/* add trailing blank, just in case */
 		    buf_end = sc_end;
 		    sc_end = NULL;
 		}
 		*e_lab = '\0';	/* null terminate line */
 		ps.pcase = false;
 	    }
 
 	    if (strncmp(s_lab, "#if", 3) == 0) { /* also ifdef, ifndef */
 		if ((size_t)ifdef_level < nitems(state_stack)) {
 		    match_state[ifdef_level].tos = -1;
 		    state_stack[ifdef_level++] = ps;
 		}
 		else
 		    diag2(1, "#if stack overflow");
 	    }
 	    else if (strncmp(s_lab, "#el", 3) == 0) { /* else, elif */
 		if (ifdef_level <= 0)
 		    diag2(1, s_lab[3] == 'i' ? "Unmatched #elif" : "Unmatched #else");
 		else {
 		    match_state[ifdef_level - 1] = ps;
 		    ps = state_stack[ifdef_level - 1];
 		}
 	    }
 	    else if (strncmp(s_lab, "#endif", 6) == 0) {
 		if (ifdef_level <= 0)
 		    diag2(1, "Unmatched #endif");
 		else
 		    ifdef_level--;
 	    } else {
 		struct directives {
 		    int size;
 		    const char *string;
 		}
 		recognized[] = {
 		    {7, "include"},
 		    {6, "define"},
 		    {5, "undef"},
 		    {4, "line"},
 		    {5, "error"},
 		    {6, "pragma"}
 		};
 		int d = nitems(recognized);
 		while (--d >= 0)
 		    if (strncmp(s_lab + 1, recognized[d].string, recognized[d].size) == 0)
 			break;
 		if (d < 0) {
 		    diag2(1, "Unrecognized cpp directive");
 		    break;
 		}
 	    }
 	    if (blanklines_around_conditional_compilation) {
 		postfix_blankline_requested++;
 		n_real_blanklines = 0;
 	    }
 	    else {
 		postfix_blankline_requested = 0;
 		prefix_blankline_requested = 0;
 	    }
 	    break;		/* subsequent processing of the newline
 				 * character will cause the line to be printed */
 
 	case comment:		/* we have gotten a / followed by * this is a biggie */
 	    if (flushed_nl) {	/* we should force a broken line here */
 		dump_line();
 		ps.want_blank = false;	/* dont insert blank at line start */
 		force_nl = false;
 	    }
 	    pr_comment();
 	    break;
 	}			/* end of big switch stmt */
 
 	*e_code = '\0';		/* make sure code section is null terminated */
 	if (type_code != comment && type_code != newline && type_code != preesc)
 	    ps.last_token = type_code;
     }				/* end of main while (1) loop */
 }
 
 /*
  * copy input file to backup file if in_name is /blah/blah/blah/file, then
  * backup file will be ".Bfile" then make the backup file the input and
  * original input file the output
  */
 static void
 bakcopy(void)
 {
     int         n,
                 bakchn;
     char        buff[8 * 1024];
     const char *p;
 
     /* construct file name .Bfile */
     for (p = in_name; *p; p++);	/* skip to end of string */
     while (p > in_name && *p != '/')	/* find last '/' */
 	p--;
     if (*p == '/')
 	p++;
     sprintf(bakfile, "%s.BAK", p);
 
     /* copy in_name to backup file */
     bakchn = creat(bakfile, 0600);
     if (bakchn < 0)
 	err(1, "%s", bakfile);
     while ((n = read(fileno(input), buff, sizeof(buff))) > 0)
 	if (write(bakchn, buff, n) != n)
 	    err(1, "%s", bakfile);
     if (n < 0)
 	err(1, "%s", in_name);
     close(bakchn);
     fclose(input);
 
     /* re-open backup file as the input file */
     input = fopen(bakfile, "r");
     if (input == NULL)
 	err(1, "%s", bakfile);
     /* now the original input file will be the output */
     output = fopen(in_name, "w");
     if (output == NULL) {
 	unlink(bakfile);
 	err(1, "%s", in_name);
     }
 }
 
 static void
 indent_declaration(int cur_dec_ind, int tabs_to_var)
 {
     int pos = e_code - s_code;
     char *startpos = e_code;
 
     /*
      * get the tab math right for indentations that are not multiples of 8
      */
     if ((ps.ind_level * ps.ind_size) % 8 != 0) {
 	pos += (ps.ind_level * ps.ind_size) % 8;
 	cur_dec_ind += (ps.ind_level * ps.ind_size) % 8;
     }
     if (tabs_to_var)
 	while ((pos & ~7) + 8 <= cur_dec_ind) {
 	    CHECK_SIZE_CODE;
 	    *e_code++ = '\t';
 	    pos = (pos & ~7) + 8;
 	}
     while (pos < cur_dec_ind) {
 	CHECK_SIZE_CODE;
 	*e_code++ = ' ';
 	pos++;
     }
     if (e_code == startpos && ps.want_blank) {
 	*e_code++ = ' ';
 	ps.want_blank = false;
     }
 }
Index: projects/clang391-import/usr.bin/indent/indent_codes.h
===================================================================
--- projects/clang391-import/usr.bin/indent/indent_codes.h	(revision 309262)
+++ projects/clang391-import/usr.bin/indent/indent_codes.h	(revision 309263)
@@ -1,70 +1,71 @@
 /*-
  * Copyright (c) 1985 Sun Microsystems, Inc.
  * Copyright (c) 1980, 1993
  *	The Regents of the University of California.  All rights reserved.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)indent_codes.h	8.1 (Berkeley) 6/6/93
  * $FreeBSD$
  */
 
 #define newline		1
 #define lparen		2
 #define rparen		3
 #define unary_op	4
 #define binary_op	5
 #define postop		6
 #define question	7
 #define casestmt	8
 #define colon		9
 #define semicolon	10
 #define lbrace		11
 #define rbrace		12
 #define ident		13
 #define comma		14
 #define comment		15
 #define swstmt		16
 #define preesc		17
 #define form_feed	18
 #define decl		19
 #define sp_paren	20
 #define sp_nparen	21
 #define ifstmt		22
 #define whilestmt	23
 #define forstmt		24
 #define stmt		25
 #define stmtl		26
 #define elselit		27
 #define dolit		28
 #define dohead		29
 #define ifhead		30
 #define elsehead	31
 #define period		32
+#define strpfx		33
Index: projects/clang391-import/usr.bin/indent/lexi.c
===================================================================
--- projects/clang391-import/usr.bin/indent/lexi.c	(revision 309262)
+++ projects/clang391-import/usr.bin/indent/lexi.c	(revision 309263)
@@ -1,627 +1,632 @@
 /*-
  * Copyright (c) 1985 Sun Microsystems, Inc.
  * Copyright (c) 1980, 1993
  *	The Regents of the University of California.  All rights reserved.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #if 0
 #ifndef lint
 static char sccsid[] = "@(#)lexi.c	8.1 (Berkeley) 6/6/93";
 #endif /* not lint */
 #endif
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * Here we have the token scanner for indent.  It scans off one token and puts
  * it in the global variable "token".  It returns a code, indicating the type
  * of token scanned.
  */
 
 #include <err.h>
 #include <stdio.h>
 #include <ctype.h>
 #include <stdlib.h>
 #include <string.h>
 #include "indent_globs.h"
 #include "indent_codes.h"
 #include "indent.h"
 
 #define alphanum 1
 #define opchar 3
 
 struct templ {
     const char *rwd;
     int         rwcode;
 };
 
 /*
  * This table has to be sorted alphabetically, because it'll be used in binary
  * search. For the same reason, string must be the first thing in struct templ.
  */
 struct templ specials[] =
 {
     {"break", 9},
     {"case", 8},
     {"char", 4},
     {"const", 4},
     {"default", 8},
     {"do", 6},
     {"double", 4},
     {"else", 6},
     {"enum", 3},
     {"extern", 4},
     {"float", 4},
     {"for", 5},
     {"global", 4},
     {"goto", 9},
     {"if", 5},
     {"int", 4},
     {"long", 4},
     {"offsetof", 1},
     {"register", 4},
     {"return", 9},
     {"short", 4},
     {"sizeof", 2},
     {"static", 4},
     {"struct", 3},
     {"switch", 7},
     {"typedef", 4},
     {"union", 3},
     {"unsigned", 4},
     {"void", 4},
     {"volatile", 4},
     {"while", 5}
 };
 
 const char **typenames;
 int         typename_count;
 int         typename_top = -1;
 
 char        chartype[128] =
 {				/* this is used to facilitate the decision of
 				 * what type (alphanumeric, operator) each
 				 * character is */
     0, 0, 0, 0, 0, 0, 0, 0,
     0, 0, 0, 0, 0, 0, 0, 0,
     0, 0, 0, 0, 0, 0, 0, 0,
     0, 0, 0, 0, 0, 0, 0, 0,
     0, 3, 0, 0, 1, 3, 3, 0,
     0, 0, 3, 3, 0, 3, 0, 3,
     1, 1, 1, 1, 1, 1, 1, 1,
     1, 1, 0, 0, 3, 3, 3, 3,
     0, 1, 1, 1, 1, 1, 1, 1,
     1, 1, 1, 1, 1, 1, 1, 1,
     1, 1, 1, 1, 1, 1, 1, 1,
     1, 1, 1, 0, 0, 0, 3, 1,
     0, 1, 1, 1, 1, 1, 1, 1,
     1, 1, 1, 1, 1, 1, 1, 1,
     1, 1, 1, 1, 1, 1, 1, 1,
     1, 1, 1, 0, 3, 0, 3, 0
 };
 
 static int
 strcmp_type(const void *e1, const void *e2)
 {
     return (strcmp(e1, *(const char * const *)e2));
 }
 
 int
 lexi(void)
 {
     int         unary_delim;	/* this is set to 1 if the current token
 				 * forces a following operator to be unary */
     static int  last_code;	/* the last token type returned */
     static int  l_struct;	/* set to 1 if the last token was 'struct' */
     int         code;		/* internal code to be returned */
     char        qchar;		/* the delimiter character for a string */
 
     e_token = s_token;		/* point to start of place to save token */
     unary_delim = false;
     ps.col_1 = ps.last_nl;	/* tell world that this token started in
 				 * column 1 iff the last thing scanned was nl */
     ps.last_nl = false;
 
     while (*buf_ptr == ' ' || *buf_ptr == '\t') {	/* get rid of blanks */
 	ps.col_1 = false;	/* leading blanks imply token is not in column
 				 * 1 */
 	if (++buf_ptr >= buf_end)
 	    fill_buffer();
     }
 
     /* Scan an alphanumeric token */
     if (chartype[(int)*buf_ptr] == alphanum || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) {
 	/*
 	 * we have a character or number
 	 */
 	struct templ *p;
 
 	if (isdigit(*buf_ptr) || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) {
 	    int         seendot = 0,
 	                seenexp = 0,
 			seensfx = 0;
 	    if (*buf_ptr == '0' &&
 		    (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) {
 		*e_token++ = *buf_ptr++;
 		*e_token++ = *buf_ptr++;
 		while (isxdigit(*buf_ptr)) {
 		    CHECK_SIZE_TOKEN;
 		    *e_token++ = *buf_ptr++;
 		}
 	    }
 	    else
 		while (1) {
 		    if (*buf_ptr == '.') {
 			if (seendot)
 			    break;
 			else
 			    seendot++;
 		    }
 		    CHECK_SIZE_TOKEN;
 		    *e_token++ = *buf_ptr++;
 		    if (!isdigit(*buf_ptr) && *buf_ptr != '.') {
 			if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp)
 			    break;
 			else {
 			    seenexp++;
 			    seendot++;
 			    CHECK_SIZE_TOKEN;
 			    *e_token++ = *buf_ptr++;
 			    if (*buf_ptr == '+' || *buf_ptr == '-')
 				*e_token++ = *buf_ptr++;
 			}
 		    }
 		}
 	    while (1) {
 		if (!(seensfx & 1) && (*buf_ptr == 'U' || *buf_ptr == 'u')) {
 		    CHECK_SIZE_TOKEN;
 		    *e_token++ = *buf_ptr++;
 		    seensfx |= 1;
 		    continue;
 		}
 		if (!(seensfx & 2) && (strchr("fFlL", *buf_ptr) != NULL)) {
 		    CHECK_SIZE_TOKEN;
 		    if (buf_ptr[1] == buf_ptr[0])
 		        *e_token++ = *buf_ptr++;
 		    *e_token++ = *buf_ptr++;
 		    seensfx |= 2;
 		    continue;
 		}
 		break;
 	    }
 	}
 	else
 	    while (chartype[(int)*buf_ptr] == alphanum || *buf_ptr == BACKSLASH) {
 		/* fill_buffer() terminates buffer with newline */
 		if (*buf_ptr == BACKSLASH) {
 		    if (*(buf_ptr + 1) == '\n') {
 			buf_ptr += 2;
 			if (buf_ptr >= buf_end)
 			    fill_buffer();
 			} else
 			    break;
 		}
 		CHECK_SIZE_TOKEN;
 		/* copy it over */
 		*e_token++ = *buf_ptr++;
 		if (buf_ptr >= buf_end)
 		    fill_buffer();
 	    }
 	*e_token++ = '\0';
+
+	if (s_token[0] == 'L' && s_token[1] == '\0' &&
+	      (*buf_ptr == '"' || *buf_ptr == '\''))
+	    return (strpfx);
+
 	while (*buf_ptr == ' ' || *buf_ptr == '\t') {	/* get rid of blanks */
 	    if (++buf_ptr >= buf_end)
 		fill_buffer();
 	}
 	ps.keyword = 0;
 	if (l_struct && !ps.p_l_follow) {
 				/* if last token was 'struct' and we're not
 				 * in parentheses, then this token
 				 * should be treated as a declaration */
 	    l_struct = false;
 	    last_code = ident;
 	    ps.last_u_d = true;
 	    return (decl);
 	}
 	ps.last_u_d = l_struct;	/* Operator after identifier is binary
 				 * unless last token was 'struct' */
 	l_struct = false;
 	last_code = ident;	/* Remember that this is the code we will
 				 * return */
 
 	p = bsearch(s_token,
 	    specials,
 	    sizeof(specials) / sizeof(specials[0]),
 	    sizeof(specials[0]),
 	    strcmp_type);
 	if (p == NULL) {	/* not a special keyword... */
 	    char *u;
 
 	    /* ... so maybe a type_t or a typedef */
 	    if ((auto_typedefs && ((u = strrchr(s_token, '_')) != NULL) &&
 	        strcmp(u, "_t") == 0) || (typename_top >= 0 &&
 		  bsearch(s_token, typenames, typename_top + 1,
 		    sizeof(typenames[0]), strcmp_type))) {
 		ps.keyword = 4;	/* a type name */
 		ps.last_u_d = true;
 	        goto found_typename;
 	    }
 	} else {			/* we have a keyword */
 	    ps.keyword = p->rwcode;
 	    ps.last_u_d = true;
 	    switch (p->rwcode) {
 	    case 7:		/* it is a switch */
 		return (swstmt);
 	    case 8:		/* a case or default */
 		return (casestmt);
 
 	    case 3:		/* a "struct" */
 		/*
 		 * Next time around, we will want to know that we have had a
 		 * 'struct'
 		 */
 		l_struct = true;
 		/* FALLTHROUGH */
 
 	    case 4:		/* one of the declaration keywords */
 	    found_typename:
 		if (ps.p_l_follow) {
 		    /* inside parens: cast, param list, offsetof or sizeof */
 		    ps.cast_mask |= (1 << ps.p_l_follow) & ~ps.not_cast_mask;
 		    break;
 		}
 		last_code = decl;
 		return (decl);
 
 	    case 5:		/* if, while, for */
 		return (sp_paren);
 
 	    case 6:		/* do, else */
 		return (sp_nparen);
 
 	    default:		/* all others are treated like any other
 				 * identifier */
 		return (ident);
 	    }			/* end of switch */
 	}			/* end of if (found_it) */
 	if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) {
 	    char *tp = buf_ptr;
 	    while (tp < buf_end)
 		if (*tp++ == ')' && (*tp == ';' || *tp == ','))
 		    goto not_proc;
 	    strncpy(ps.procname, token, sizeof ps.procname - 1);
 	    ps.in_parameter_declaration = 1;
 	    rparen_count = 1;
     not_proc:;
 	}
 	/*
 	 * The following hack attempts to guess whether or not the current
 	 * token is in fact a declaration keyword -- one that has been
 	 * typedefd
 	 */
 	if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_')
 		&& !ps.p_l_follow
 	        && !ps.block_init
 		&& (ps.last_token == rparen || ps.last_token == semicolon ||
 		    ps.last_token == decl ||
 		    ps.last_token == lbrace || ps.last_token == rbrace)) {
 	    ps.keyword = 4;	/* a type name */
 	    ps.last_u_d = true;
 	    last_code = decl;
 	    return decl;
 	}
 	if (last_code == decl)	/* if this is a declared variable, then
 				 * following sign is unary */
 	    ps.last_u_d = true;	/* will make "int a -1" work */
 	last_code = ident;
 	return (ident);		/* the ident is not in the list */
     }				/* end of procesing for alpanum character */
 
     /* Scan a non-alphanumeric token */
 
     *e_token++ = *buf_ptr;		/* if it is only a one-character token, it is
 				 * moved here */
     *e_token = '\0';
     if (++buf_ptr >= buf_end)
 	fill_buffer();
 
     switch (*token) {
     case '\n':
 	unary_delim = ps.last_u_d;
 	ps.last_nl = true;	/* remember that we just had a newline */
 	code = (had_eof ? 0 : newline);
 
 	/*
 	 * if data has been exhausted, the newline is a dummy, and we should
 	 * return code to stop
 	 */
 	break;
 
     case '\'':			/* start of quoted character */
     case '"':			/* start of string */
 	qchar = *token;
 	if (troff) {
 	    e_token[-1] = '`';
 	    if (qchar == '"')
 		*e_token++ = '`';
 	    e_token = chfont(&bodyf, &stringf, e_token);
 	}
 	do {			/* copy the string */
 	    while (1) {		/* move one character or [/<char>]<char> */
 		if (*buf_ptr == '\n') {
 		    diag2(1, "Unterminated literal");
 		    goto stop_lit;
 		}
 		CHECK_SIZE_TOKEN;	/* Only have to do this once in this loop,
 					 * since CHECK_SIZE guarantees that there
 					 * are at least 5 entries left */
 		*e_token = *buf_ptr++;
 		if (buf_ptr >= buf_end)
 		    fill_buffer();
 		if (*e_token == BACKSLASH) {	/* if escape, copy extra char */
 		    if (*buf_ptr == '\n')	/* check for escaped newline */
 			++line_no;
 		    if (troff) {
 			*++e_token = BACKSLASH;
 			if (*buf_ptr == BACKSLASH)
 			    *++e_token = BACKSLASH;
 		    }
 		    *++e_token = *buf_ptr++;
 		    ++e_token;	/* we must increment this again because we
 				 * copied two chars */
 		    if (buf_ptr >= buf_end)
 			fill_buffer();
 		}
 		else
 		    break;	/* we copied one character */
 	    }			/* end of while (1) */
 	} while (*e_token++ != qchar);
 	if (troff) {
 	    e_token = chfont(&stringf, &bodyf, e_token - 1);
 	    if (qchar == '"')
 		*e_token++ = '\'';
 	}
 stop_lit:
 	code = ident;
 	break;
 
     case ('('):
     case ('['):
 	unary_delim = true;
 	code = lparen;
 	break;
 
     case (')'):
     case (']'):
 	code = rparen;
 	break;
 
     case '#':
 	unary_delim = ps.last_u_d;
 	code = preesc;
 	break;
 
     case '?':
 	unary_delim = true;
 	code = question;
 	break;
 
     case (':'):
 	code = colon;
 	unary_delim = true;
 	break;
 
     case (';'):
 	unary_delim = true;
 	code = semicolon;
 	break;
 
     case ('{'):
 	unary_delim = true;
 
 	/*
 	 * if (ps.in_or_st) ps.block_init = 1;
 	 */
 	/* ?	code = ps.block_init ? lparen : lbrace; */
 	code = lbrace;
 	break;
 
     case ('}'):
 	unary_delim = true;
 	/* ?	code = ps.block_init ? rparen : rbrace; */
 	code = rbrace;
 	break;
 
     case 014:			/* a form feed */
 	unary_delim = ps.last_u_d;
 	ps.last_nl = true;	/* remember this so we can set 'ps.col_1'
 				 * right */
 	code = form_feed;
 	break;
 
     case (','):
 	unary_delim = true;
 	code = comma;
 	break;
 
     case '.':
 	unary_delim = false;
 	code = period;
 	break;
 
     case '-':
     case '+':			/* check for -, +, --, ++ */
 	code = (ps.last_u_d ? unary_op : binary_op);
 	unary_delim = true;
 
 	if (*buf_ptr == token[0]) {
 	    /* check for doubled character */
 	    *e_token++ = *buf_ptr++;
 	    /* buffer overflow will be checked at end of loop */
 	    if (last_code == ident || last_code == rparen) {
 		code = (ps.last_u_d ? unary_op : postop);
 		/* check for following ++ or -- */
 		unary_delim = false;
 	    }
 	}
 	else if (*buf_ptr == '=')
 	    /* check for operator += */
 	    *e_token++ = *buf_ptr++;
 	else if (*buf_ptr == '>') {
 	    /* check for operator -> */
 	    *e_token++ = *buf_ptr++;
 	    if (!pointer_as_binop) {
 		unary_delim = false;
 		code = unary_op;
 		ps.want_blank = false;
 	    }
 	}
 	break;			/* buffer overflow will be checked at end of
 				 * switch */
 
     case '=':
 	if (ps.in_or_st)
 	    ps.block_init = 1;
 #ifdef undef
 	if (chartype[*buf_ptr] == opchar) {	/* we have two char assignment */
 	    e_token[-1] = *buf_ptr++;
 	    if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr)
 		*e_token++ = *buf_ptr++;
 	    *e_token++ = '=';	/* Flip =+ to += */
 	    *e_token = 0;
 	}
 #else
 	if (*buf_ptr == '=') {/* == */
 	    *e_token++ = '=';	/* Flip =+ to += */
 	    buf_ptr++;
 	    *e_token = 0;
 	}
 #endif
 	code = binary_op;
 	unary_delim = true;
 	break;
 	/* can drop thru!!! */
 
     case '>':
     case '<':
     case '!':			/* ops like <, <<, <=, !=, etc */
 	if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
 	    *e_token++ = *buf_ptr;
 	    if (++buf_ptr >= buf_end)
 		fill_buffer();
 	}
 	if (*buf_ptr == '=')
 	    *e_token++ = *buf_ptr++;
 	code = (ps.last_u_d ? unary_op : binary_op);
 	unary_delim = true;
 	break;
 
     default:
 	if (token[0] == '/' && *buf_ptr == '*') {
 	    /* it is start of comment */
 	    *e_token++ = '*';
 
 	    if (++buf_ptr >= buf_end)
 		fill_buffer();
 
 	    code = comment;
 	    unary_delim = ps.last_u_d;
 	    break;
 	}
 	while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') {
 	    /*
 	     * handle ||, &&, etc, and also things as in int *****i
 	     */
 	    *e_token++ = *buf_ptr;
 	    if (++buf_ptr >= buf_end)
 		fill_buffer();
 	}
 	code = (ps.last_u_d ? unary_op : binary_op);
 	unary_delim = true;
 
 
     }				/* end of switch */
     if (code != newline) {
 	l_struct = false;
 	last_code = code;
     }
     if (buf_ptr >= buf_end)	/* check for input buffer empty */
 	fill_buffer();
     ps.last_u_d = unary_delim;
     *e_token = '\0';		/* null terminate the token */
     return (code);
 }
 
 void
 alloc_typenames(void)
 {
 
     typenames = (const char **)malloc(sizeof(typenames[0]) *
         (typename_count = 16));
     if (typenames == NULL)
 	err(1, NULL);
 }
 
 void
 add_typename(const char *key)
 {
     int comparison;
     const char *copy;
 
     if (typename_top + 1 >= typename_count) {
 	typenames = realloc((void *)typenames,
 	    sizeof(typenames[0]) * (typename_count *= 2));
 	if (typenames == NULL)
 	    err(1, NULL);
     }
     if (typename_top == -1)
 	typenames[++typename_top] = copy = strdup(key);
     else if ((comparison = strcmp(key, typenames[typename_top])) >= 0) {
 	/* take advantage of sorted input */
 	if (comparison == 0)	/* remove duplicates */
 	    return;
 	typenames[++typename_top] = copy = strdup(key);
     }
     else {
 	int p;
 
 	for (p = 0; (comparison = strcmp(key, typenames[p])) > 0; p++)
 	    /* find place for the new key */;
 	if (comparison == 0)	/* remove duplicates */
 	    return;
 	memmove(&typenames[p + 1], &typenames[p],
 	    sizeof(typenames[0]) * (++typename_top - p));
 	typenames[p] = copy = strdup(key);
     }
 
     if (copy == NULL)
 	err(1, NULL);
 }
Index: projects/clang391-import/usr.bin/indent/parse.c
===================================================================
--- projects/clang391-import/usr.bin/indent/parse.c	(revision 309262)
+++ projects/clang391-import/usr.bin/indent/parse.c	(revision 309263)
@@ -1,337 +1,337 @@
 /*-
  * Copyright (c) 1985 Sun Microsystems, Inc.
  * Copyright (c) 1980, 1993
  *	The Regents of the University of California.  All rights reserved.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #if 0
 #ifndef lint
 static char sccsid[] = "@(#)parse.c	8.1 (Berkeley) 6/6/93";
 #endif /* not lint */
 #endif
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <err.h>
 #include <stdio.h>
 #include "indent_globs.h"
 #include "indent_codes.h"
 #include "indent.h"
 
 static void reduce(void);
 
 void
 parse(int tk) /* tk: the code for the construct scanned */
 {
     int         i;
 
 #ifdef debug
     printf("%2d - %s\n", tk, token);
 #endif
 
     while (ps.p_stack[ps.tos] == ifhead && tk != elselit) {
 	/* true if we have an if without an else */
 	ps.p_stack[ps.tos] = stmt;	/* apply the if(..) stmt ::= stmt
 					 * reduction */
 	reduce();		/* see if this allows any reduction */
     }
 
 
     switch (tk) {		/* go on and figure out what to do with the
 				 * input */
 
     case decl:			/* scanned a declaration word */
 	ps.search_brace = btype_2;
 	/* indicate that following brace should be on same line */
 	if (ps.p_stack[ps.tos] != decl) {	/* only put one declaration
 						 * onto stack */
 	    break_comma = true;	/* while in declaration, newline should be
 				 * forced after comma */
 	    ps.p_stack[++ps.tos] = decl;
 	    ps.il[ps.tos] = ps.i_l_follow;
 
 	    if (ps.ljust_decl) {/* only do if we want left justified
 				 * declarations */
 		ps.ind_level = 0;
 		for (i = ps.tos - 1; i > 0; --i)
 		    if (ps.p_stack[i] == decl)
 			++ps.ind_level;	/* indentation is number of
 					 * declaration levels deep we are */
 		ps.i_l_follow = ps.ind_level;
 	    }
 	}
 	break;
 
     case ifstmt:		/* scanned if (...) */
 	if (ps.p_stack[ps.tos] == elsehead && ps.else_if)	/* "else if ..." */
 	    ps.i_l_follow = ps.il[ps.tos];
 	/* the rest is the same as for dolit and forstmt */
     case dolit:		/* 'do' */
     case forstmt:		/* for (...) */
 	ps.p_stack[++ps.tos] = tk;
 	ps.il[ps.tos] = ps.ind_level = ps.i_l_follow;
 	++ps.i_l_follow;	/* subsequent statements should be indented 1 */
 	ps.search_brace = btype_2;
 	break;
 
     case lbrace:		/* scanned { */
 	break_comma = false;	/* don't break comma in an initial list */
 	if (ps.p_stack[ps.tos] == stmt || ps.p_stack[ps.tos] == decl
 		|| ps.p_stack[ps.tos] == stmtl)
 	    ++ps.i_l_follow;	/* it is a random, isolated stmt group or a
 				 * declaration */
 	else {
 	    if (s_code == e_code) {
 		/*
 		 * only do this if there is nothing on the line
 		 */
 		--ps.ind_level;
 		/*
 		 * it is a group as part of a while, for, etc.
 		 */
 		if (ps.p_stack[ps.tos] == swstmt && ps.case_indent >= 1)
 		    --ps.ind_level;
 		/*
 		 * for a switch, brace should be two levels out from the code
 		 */
 	    }
 	}
 
 	ps.p_stack[++ps.tos] = lbrace;
 	ps.il[ps.tos] = ps.ind_level;
 	ps.p_stack[++ps.tos] = stmt;
 	/* allow null stmt between braces */
 	ps.il[ps.tos] = ps.i_l_follow;
 	break;
 
     case whilestmt:		/* scanned while (...) */
 	if (ps.p_stack[ps.tos] == dohead) {
 	    /* it is matched with do stmt */
 	    ps.ind_level = ps.i_l_follow = ps.il[ps.tos];
 	    ps.p_stack[++ps.tos] = whilestmt;
 	    ps.il[ps.tos] = ps.ind_level = ps.i_l_follow;
 	}
 	else {			/* it is a while loop */
 	    ps.p_stack[++ps.tos] = whilestmt;
 	    ps.il[ps.tos] = ps.i_l_follow;
 	    ++ps.i_l_follow;
 	    ps.search_brace = btype_2;
 	}
 
 	break;
 
     case elselit:		/* scanned an else */
 
 	if (ps.p_stack[ps.tos] != ifhead)
 	    diag2(1, "Unmatched 'else'");
 	else {
 	    ps.ind_level = ps.il[ps.tos];	/* indentation for else should
 						 * be same as for if */
 	    ps.i_l_follow = ps.ind_level + 1;	/* everything following should
 						 * be in 1 level */
 	    ps.p_stack[ps.tos] = elsehead;
 	    /* remember if with else */
 	    ps.search_brace = btype_2 | ps.else_if;
 	}
 	break;
 
     case rbrace:		/* scanned a } */
 	/* stack should have <lbrace> <stmt> or <lbrace> <stmtl> */
 	if (ps.p_stack[ps.tos - 1] == lbrace) {
 	    ps.ind_level = ps.i_l_follow = ps.il[--ps.tos];
 	    ps.p_stack[ps.tos] = stmt;
 	}
 	else
 	    diag2(1, "Statement nesting error");
 	break;
 
     case swstmt:		/* had switch (...) */
 	ps.p_stack[++ps.tos] = swstmt;
 	ps.cstk[ps.tos] = case_ind;
 	/* save current case indent level */
 	ps.il[ps.tos] = ps.i_l_follow;
 	case_ind = ps.i_l_follow + ps.case_indent;	/* cases should be one
 							 * level down from
 							 * switch */
 	ps.i_l_follow += ps.case_indent + 1;	/* statements should be two
 						 * levels in */
 	ps.search_brace = btype_2;
 	break;
 
     case semicolon:		/* this indicates a simple stmt */
 	break_comma = false;	/* turn off flag to break after commas in a
 				 * declaration */
 	ps.p_stack[++ps.tos] = stmt;
 	ps.il[ps.tos] = ps.ind_level;
 	break;
 
     default:			/* this is an error */
 	diag2(1, "Unknown code to parser");
 	return;
 
 
     }				/* end of switch */
 
-    if (ps.tos >= STACKSIZE)
+    if (ps.tos >= STACKSIZE - 1)
 	errx(1, "Parser stack overflow");
 
     reduce();			/* see if any reduction can be done */
 
 #ifdef debug
     for (i = 1; i <= ps.tos; ++i)
 	printf("(%d %d)", ps.p_stack[i], ps.il[i]);
     printf("\n");
 #endif
 
     return;
 }
 
 /*
  * NAME: reduce
  *
  * FUNCTION: Implements the reduce part of the parsing algorithm
  *
  * ALGORITHM: The following reductions are done.  Reductions are repeated
  *	until no more are possible.
  *
  * Old TOS		New TOS
  * <stmt> <stmt>	<stmtl>
  * <stmtl> <stmt>	<stmtl>
  * do <stmt>		"dostmt"
  * if <stmt>		"ifstmt"
  * switch <stmt>	<stmt>
  * decl <stmt>		<stmt>
  * "ifelse" <stmt>	<stmt>
  * for <stmt>		<stmt>
  * while <stmt>		<stmt>
  * "dostmt" while	<stmt>
  *
  * On each reduction, ps.i_l_follow (the indentation for the following line)
  * is set to the indentation level associated with the old TOS.
  *
  * PARAMETERS: None
  *
  * RETURNS: Nothing
  *
  * GLOBALS: ps.cstk ps.i_l_follow = ps.il ps.p_stack = ps.tos =
  *
  * CALLS: None
  *
  * CALLED BY: parse
  *
  * HISTORY: initial coding 	November 1976	D A Willcox of CAC
  *
  */
 /*----------------------------------------------*\
 |   REDUCTION PHASE				    |
 \*----------------------------------------------*/
 static void
 reduce(void)
 {
     int i;
 
     for (;;) {			/* keep looping until there is nothing left to
 				 * reduce */
 
 	switch (ps.p_stack[ps.tos]) {
 
 	case stmt:
 	    switch (ps.p_stack[ps.tos - 1]) {
 
 	    case stmt:
 	    case stmtl:
 		/* stmtl stmt or stmt stmt */
 		ps.p_stack[--ps.tos] = stmtl;
 		break;
 
 	    case dolit:	/* <do> <stmt> */
 		ps.p_stack[--ps.tos] = dohead;
 		ps.i_l_follow = ps.il[ps.tos];
 		break;
 
 	    case ifstmt:
 		/* <if> <stmt> */
 		ps.p_stack[--ps.tos] = ifhead;
 		for (i = ps.tos - 1;
 			(
 			 ps.p_stack[i] != stmt
 			 &&
 			 ps.p_stack[i] != stmtl
 			 &&
 			 ps.p_stack[i] != lbrace
 			 );
 			--i);
 		ps.i_l_follow = ps.il[i];
 		/*
 		 * for the time being, we will assume that there is no else on
 		 * this if, and set the indentation level accordingly. If an
 		 * else is scanned, it will be fixed up later
 		 */
 		break;
 
 	    case swstmt:
 		/* <switch> <stmt> */
 		case_ind = ps.cstk[ps.tos - 1];
 		/* FALLTHROUGH */
 	    case decl:		/* finish of a declaration */
 	    case elsehead:
 		/* <<if> <stmt> else> <stmt> */
 	    case forstmt:
 		/* <for> <stmt> */
 	    case whilestmt:
 		/* <while> <stmt> */
 		ps.p_stack[--ps.tos] = stmt;
 		ps.i_l_follow = ps.il[ps.tos];
 		break;
 
 	    default:		/* <anything else> <stmt> */
 		return;
 
 	    }			/* end of section for <stmt> on top of stack */
 	    break;
 
 	case whilestmt:	/* while (...) on top */
 	    if (ps.p_stack[ps.tos - 1] == dohead) {
 		/* it is termination of a do while */
 		ps.tos -= 2;
 		break;
 	    }
 	    else
 		return;
 
 	default:		/* anything else on top */
 	    return;
 
 	}
     }
 }
Index: projects/clang391-import/usr.bin/indent/pr_comment.c
===================================================================
--- projects/clang391-import/usr.bin/indent/pr_comment.c	(revision 309262)
+++ projects/clang391-import/usr.bin/indent/pr_comment.c	(revision 309263)
@@ -1,335 +1,339 @@
 /*-
  * Copyright (c) 1985 Sun Microsystems, Inc.
  * Copyright (c) 1980, 1993
  *	The Regents of the University of California.  All rights reserved.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #if 0
 #ifndef lint
 static char sccsid[] = "@(#)pr_comment.c	8.1 (Berkeley) 6/6/93";
 #endif /* not lint */
 #endif
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <err.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include "indent_globs.h"
 #include "indent.h"
 /*
  * NAME:
  *	pr_comment
  *
  * FUNCTION:
  *	This routine takes care of scanning and printing comments.
  *
  * ALGORITHM:
  *	1) Decide where the comment should be aligned, and if lines should
  *	   be broken.
  *	2) If lines should not be broken and filled, just copy up to end of
  *	   comment.
  *	3) If lines should be filled, then scan thru input_buffer copying
  *	   characters to com_buf.  Remember where the last blank, tab, or
  *	   newline was.  When line is filled, print up to last blank and
  *	   continue copying.
  *
  * HISTORY:
  *	November 1976	D A Willcox of CAC	Initial coding
  *	12/6/76		D A Willcox of CAC	Modification to handle
  *						UNIX-style comments
  *
  */
 
 /*
  * this routine processes comments.  It makes an attempt to keep comments from
  * going over the max line length.  If a line is too long, it moves everything
  * from the last blank to the next comment line.  Blanks and tabs from the
  * beginning of the input line are removed
  */
 
 void
 pr_comment(void)
 {
     int         now_col;	/* column we are in now */
     int         adj_max_col;	/* Adjusted max_col for when we decide to
 				 * spill comments over the right margin */
     char       *last_bl;	/* points to the last blank in the output
 				 * buffer */
     char       *t_ptr;		/* used for moving string */
     int         break_delim = comment_delimiter_on_blankline;
     int         l_just_saw_decl = ps.just_saw_decl;
     adj_max_col = max_col;
     ps.just_saw_decl = 0;
     last_bl = NULL;		/* no blanks found so far */
     ps.box_com = false;		/* at first, assume that we are not in
 					 * a boxed comment or some other
 					 * comment that should not be touched */
     ++ps.out_coms;		/* keep track of number of comments */
 
     /* Figure where to align and how to treat the comment */
 
     if (ps.col_1 && !format_col1_comments) {	/* if comment starts in column
 						 * 1 it should not be touched */
 	ps.box_com = true;
 	break_delim = false;
 	ps.com_col = 1;
     }
     else {
 	if (*buf_ptr == '-' || *buf_ptr == '*' ||
 	    (*buf_ptr == '\n' && !format_block_comments)) {
 	    ps.box_com = true;	/* A comment with a '-' or '*' immediately
 				 * after the /+* is assumed to be a boxed
 				 * comment. A comment with a newline
 				 * immediately after the /+* is assumed to
 				 * be a block comment and is treated as a
 				 * box comment unless format_block_comments
 				 * is nonzero (the default). */
 	    break_delim = false;
 	}
 	if ( /* ps.bl_line && */ (s_lab == e_lab) && (s_code == e_code)) {
 	    /* klg: check only if this line is blank */
 	    /*
 	     * If this (*and previous lines are*) blank, dont put comment way
 	     * out at left
 	     */
 	    ps.com_col = (ps.ind_level - ps.unindent_displace) * ps.ind_size + 1;
 	    adj_max_col = block_comment_max_col;
 	    if (ps.com_col <= 1)
 		ps.com_col = 1 + !format_col1_comments;
 	}
 	else {
 	    int target_col;
 	    break_delim = false;
 	    if (s_code != e_code)
 		target_col = count_spaces(compute_code_target(), s_code);
 	    else {
 		target_col = 1;
 		if (s_lab != e_lab)
 		    target_col = count_spaces(compute_label_target(), s_lab);
 	    }
 	    ps.com_col = ps.decl_on_line || ps.ind_level == 0 ? ps.decl_com_ind : ps.com_ind;
 	    if (ps.com_col < target_col)
 		ps.com_col = ((target_col + 7) & ~7) + 1;
 	    if (ps.com_col + 24 > adj_max_col)
 		adj_max_col = ps.com_col + 24;
 	}
     }
     if (ps.box_com) {
 	buf_ptr[-2] = 0;
 	ps.n_comment_delta = 1 - count_spaces(1, in_buffer);
 	buf_ptr[-2] = '/';
     }
     else {
 	ps.n_comment_delta = 0;
 	while (*buf_ptr == ' ' || *buf_ptr == '\t')
 	    buf_ptr++;
     }
     ps.comment_delta = 0;
     *e_com++ = '/';		/* put '/' followed by '*' into buffer */
     *e_com++ = '*';
     if (*buf_ptr != ' ' && !ps.box_com)
 	*e_com++ = ' ';
 
-    /* Don't put a break delimiter if this comment is a one-liner */
-    for (t_ptr = buf_ptr; *t_ptr != '\0' && *t_ptr != '\n'; t_ptr++) {
-	if (t_ptr >= buf_end)
-	    fill_buffer();
-	if (t_ptr[0] == '*' && t_ptr[1] == '/') {
-	    break_delim = false;
-	    break;
+    /*
+     * Don't put a break delimiter if this is a one-liner that won't wrap.
+     */
+    if (break_delim)
+	for (t_ptr = buf_ptr; *t_ptr != '\0' && *t_ptr != '\n'; t_ptr++) {
+	    if (t_ptr >= buf_end)
+		fill_buffer();
+	    if (t_ptr[0] == '*' && t_ptr[1] == '/') {
+		if (adj_max_col >= count_spaces_until(ps.com_col, buf_ptr, t_ptr + 2))
+		    break_delim = false;
+		break;
+	    }
 	}
-    }
 
     if (break_delim) {
 	char       *t = e_com;
 	e_com = s_com + 2;
 	*e_com = 0;
 	if (blanklines_before_blockcomments)
 	    prefix_blankline_requested = 1;
 	dump_line();
 	e_com = s_com = t;
 	if (!ps.box_com && star_comment_cont)
 	    *e_com++ = ' ', *e_com++ = '*', *e_com++ = ' ';
     }
 
     if (troff)
 	adj_max_col = 80;
 
     /* Start to copy the comment */
 
     while (1) {			/* this loop will go until the comment is
 				 * copied */
 	CHECK_SIZE_COM;
 	switch (*buf_ptr) {	/* this checks for various spcl cases */
 	case 014:		/* check for a form feed */
 	    if (!ps.box_com) {	/* in a text comment, break the line here */
 		ps.use_ff = true;
 		/* fix so dump_line uses a form feed */
 		dump_line();
 		last_bl = NULL;
 		if (!ps.box_com && star_comment_cont)
 		    *e_com++ = ' ', *e_com++ = '*', *e_com++ = ' ';
 		while (*++buf_ptr == ' ' || *buf_ptr == '\t')
 		    ;
 	    }
 	    else {
 		if (++buf_ptr >= buf_end)
 		    fill_buffer();
 		*e_com++ = 014;
 	    }
 	    break;
 
 	case '\n':
 	    if (had_eof) {	/* check for unexpected eof */
 		printf("Unterminated comment\n");
 		dump_line();
 		return;
 	    }
 	    last_bl = NULL;
 	    if (ps.box_com || ps.last_nl) {	/* if this is a boxed comment,
 						 * we dont ignore the newline */
 		if (s_com == e_com)
 		    *e_com++ = ' ';
 		if (!ps.box_com && e_com - s_com > 3) {
 		    dump_line();
 		    if (star_comment_cont)
 			*e_com++ = ' ', *e_com++ = '*', *e_com++ = ' ';
 		}
 		dump_line();
 		if (!ps.box_com && star_comment_cont)
 		    *e_com++ = ' ', *e_com++ = '*', *e_com++ = ' ';
 	    }
 	    else {
 		ps.last_nl = 1;
 		if (*(e_com - 1) == ' ' || *(e_com - 1) == '\t')
 		    last_bl = e_com - 1;
 		/*
 		 * if there was a space at the end of the last line, remember
 		 * where it was
 		 */
 		else {		/* otherwise, insert one */
 		    last_bl = e_com;
 		    CHECK_SIZE_COM;
 		    *e_com++ = ' ';
 		}
 	    }
 	    ++line_no;		/* keep track of input line number */
 	    if (!ps.box_com) {
 		int         nstar = 1;
 		do {		/* flush any blanks and/or tabs at start of
 				 * next line */
 		    if (++buf_ptr >= buf_end)
 			fill_buffer();
 		    if (*buf_ptr == '*' && --nstar >= 0) {
 			if (++buf_ptr >= buf_end)
 			    fill_buffer();
 			if (*buf_ptr == '/')
 			    goto end_of_comment;
 		    }
 		} while (*buf_ptr == ' ' || *buf_ptr == '\t');
 	    }
 	    else if (++buf_ptr >= buf_end)
 		fill_buffer();
 	    break;		/* end of case for newline */
 
 	case '*':		/* must check for possibility of being at end
 				 * of comment */
 	    if (++buf_ptr >= buf_end)	/* get to next char after * */
 		fill_buffer();
 
 	    if (*buf_ptr == '/') {	/* it is the end!!! */
 	end_of_comment:
 		if (++buf_ptr >= buf_end)
 		    fill_buffer();
 		CHECK_SIZE_COM;
 		if (break_delim) {
 		    if (e_com > s_com + 3) {
 			dump_line();
 		    }
 		    else
 			s_com = e_com;
 		    *e_com++ = ' ';
 		}
 		if (e_com[-1] != ' ' && !ps.box_com)
 		    *e_com++ = ' ';	/* ensure blank before end */
 		*e_com++ = '*', *e_com++ = '/', *e_com = '\0';
 		ps.just_saw_decl = l_just_saw_decl;
 		return;
 	    }
 	    else		/* handle isolated '*' */
 		*e_com++ = '*';
 	    break;
 	default:		/* we have a random char */
 	    now_col = count_spaces_until(ps.com_col, s_com, e_com);
 	    do {
 		*e_com = *buf_ptr++;
 		if (buf_ptr >= buf_end)
 		    fill_buffer();
 		if (*e_com == ' ' || *e_com == '\t')
 		    last_bl = e_com;	/* remember we saw a blank */
 		++e_com;
 		now_col++;
 	    } while (!memchr("*\n\r\b\t", *buf_ptr, 6) &&
 		(now_col <= adj_max_col || !last_bl));
 	    ps.last_nl = false;
 	    if (now_col > adj_max_col && !ps.box_com && e_com[-1] > ' ') {
 		/*
 		 * the comment is too long, it must be broken up
 		 */
 		if (last_bl == NULL) {
 		    dump_line();
 		    if (!ps.box_com && star_comment_cont)
 			*e_com++ = ' ', *e_com++ = '*', *e_com++ = ' ';
 		    break;
 		}
 		*e_com = '\0';
 		e_com = last_bl;
 		dump_line();
 		if (!ps.box_com && star_comment_cont)
 		    *e_com++ = ' ', *e_com++ = '*', *e_com++ = ' ';
 		for (t_ptr = last_bl + 1; *t_ptr == ' ' || *t_ptr == '\t';
 		    t_ptr++)
 			;
 		last_bl = NULL;
 		while (*t_ptr != '\0') {
 		    if (*t_ptr == ' ' || *t_ptr == '\t')
 			last_bl = e_com;
 		    *e_com++ = *t_ptr++;
  		}
 	    }
 	    break;
 	}
     }
 }
Index: projects/clang391-import/usr.bin/sort/bwstring.c
===================================================================
--- projects/clang391-import/usr.bin/sort/bwstring.c	(revision 309262)
+++ projects/clang391-import/usr.bin/sort/bwstring.c	(revision 309263)
@@ -1,1149 +1,1143 @@
 /*-
  * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
  * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <ctype.h>
 #include <errno.h>
 #include <err.h>
 #include <langinfo.h>
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <wchar.h>
 #include <wctype.h>
 
 #include "bwstring.h"
 #include "sort.h"
 
 bool byte_sort;
 
 static wchar_t **wmonths;
 static unsigned char **cmonths;
 
 /* initialise months */
 
 void
 initialise_months(void)
 {
 	const nl_item item[12] = { ABMON_1, ABMON_2, ABMON_3, ABMON_4,
 	    ABMON_5, ABMON_6, ABMON_7, ABMON_8, ABMON_9, ABMON_10,
 	    ABMON_11, ABMON_12 };
 	unsigned char *tmp;
 	size_t len;
 
 	if (MB_CUR_MAX == 1) {
 		if (cmonths == NULL) {
 			unsigned char *m;
 
 			cmonths = sort_malloc(sizeof(unsigned char*) * 12);
 			for (int i = 0; i < 12; i++) {
 				cmonths[i] = NULL;
 				tmp = (unsigned char *) nl_langinfo(item[i]);
 				if (debug_sort)
 					printf("month[%d]=%s\n", i, tmp);
 				if (*tmp == '\0')
 					continue;
 				m = sort_strdup(tmp);
 				len = strlen(tmp);
 				for (unsigned int j = 0; j < len; j++)
 					m[j] = toupper(m[j]);
 				cmonths[i] = m;
 			}
 		}
 
 	} else {
 		if (wmonths == NULL) {
 			wchar_t *m;
 
 			wmonths = sort_malloc(sizeof(wchar_t *) * 12);
 			for (int i = 0; i < 12; i++) {
 				wmonths[i] = NULL;
 				tmp = (unsigned char *) nl_langinfo(item[i]);
 				if (debug_sort)
 					printf("month[%d]=%s\n", i, tmp);
 				if (*tmp == '\0')
 					continue;
 				len = strlen(tmp);
 				m = sort_malloc(SIZEOF_WCHAR_STRING(len + 1));
 				if (mbstowcs(m, (char*)tmp, len) ==
 				    ((size_t) - 1)) {
 					sort_free(m);
 					continue;
 				}
 				m[len] = L'\0';
 				for (unsigned int j = 0; j < len; j++)
 					m[j] = towupper(m[j]);
 				wmonths[i] = m;
 			}
 		}
 	}
 }
 
 /*
  * Compare two wide-character strings
  */
 static int
 wide_str_coll(const wchar_t *s1, const wchar_t *s2)
 {
 	int ret = 0;
 
 	errno = 0;
 	ret = wcscoll(s1, s2);
 	if (errno == EILSEQ) {
 		errno = 0;
 		ret = wcscmp(s1, s2);
 		if (errno != 0) {
 			for (size_t i = 0; ; ++i) {
 				wchar_t c1 = s1[i];
 				wchar_t c2 = s2[i];
 				if (c1 == L'\0')
 					return ((c2 == L'\0') ? 0 : -1);
 				if (c2 == L'\0')
 					return (+1);
 				if (c1 == c2)
 					continue;
 				return ((int)(c1 - c2));
 			}
 		}
 	}
 	return (ret);
 }
 
 /* counterparts of wcs functions */
 
 void
 bwsprintf(FILE *f, struct bwstring *bws, const char *prefix, const char *suffix)
 {
 
 	if (MB_CUR_MAX == 1)
 		fprintf(f, "%s%s%s", prefix, bws->data.cstr, suffix);
 	else
 		fprintf(f, "%s%S%s", prefix, bws->data.wstr, suffix);
 }
 
 const void* bwsrawdata(const struct bwstring *bws)
 {
 
 	return (&(bws->data));
 }
 
 size_t bwsrawlen(const struct bwstring *bws)
 {
 
 	return ((MB_CUR_MAX == 1) ? bws->len : SIZEOF_WCHAR_STRING(bws->len));
 }
 
 size_t
 bws_memsize(const struct bwstring *bws)
 {
 
 	return ((MB_CUR_MAX == 1) ? (bws->len + 2 + sizeof(struct bwstring)) :
 	    (SIZEOF_WCHAR_STRING(bws->len + 1) + sizeof(struct bwstring)));
 }
 
 void
 bws_setlen(struct bwstring *bws, size_t newlen)
 {
 
 	if (bws && newlen != bws->len && newlen <= bws->len) {
 		bws->len = newlen;
 		if (MB_CUR_MAX == 1)
 			bws->data.cstr[newlen] = '\0';
 		else
 			bws->data.wstr[newlen] = L'\0';
 	}
 }
 
 /*
  * Allocate a new binary string of specified size
  */
 struct bwstring *
 bwsalloc(size_t sz)
 {
 	struct bwstring *ret;
 
 	if (MB_CUR_MAX == 1)
 		ret = sort_malloc(sizeof(struct bwstring) + 1 + sz);
 	else
 		ret = sort_malloc(sizeof(struct bwstring) +
 		    SIZEOF_WCHAR_STRING(sz + 1));
 	ret->len = sz;
 
 	if (MB_CUR_MAX == 1)
 		ret->data.cstr[ret->len] = '\0';
 	else
 		ret->data.wstr[ret->len] = L'\0';
 
 	return (ret);
 }
 
 /*
  * Create a copy of binary string.
  * New string size equals the length of the old string.
  */
 struct bwstring *
 bwsdup(const struct bwstring *s)
 {
 
 	if (s == NULL)
 		return (NULL);
 	else {
 		struct bwstring *ret = bwsalloc(s->len);
 
 		if (MB_CUR_MAX == 1)
 			memcpy(ret->data.cstr, s->data.cstr, (s->len));
 		else
 			memcpy(ret->data.wstr, s->data.wstr,
 			    SIZEOF_WCHAR_STRING(s->len));
 
 		return (ret);
 	}
 }
 
 /*
  * Create a new binary string from a wide character buffer.
  */
 struct bwstring *
 bwssbdup(const wchar_t *str, size_t len)
 {
 
 	if (str == NULL)
 		return ((len == 0) ? bwsalloc(0) : NULL);
 	else {
 		struct bwstring *ret;
 
 		ret = bwsalloc(len);
 
 		if (MB_CUR_MAX == 1)
 			for (size_t i = 0; i < len; ++i)
 				ret->data.cstr[i] = (unsigned char) str[i];
 		else
 			memcpy(ret->data.wstr, str, SIZEOF_WCHAR_STRING(len));
 
 		return (ret);
 	}
 }
 
 /*
  * Create a new binary string from a raw binary buffer.
  */
 struct bwstring *
 bwscsbdup(const unsigned char *str, size_t len)
 {
 	struct bwstring *ret;
 
 	ret = bwsalloc(len);
 
 	if (str) {
 		if (MB_CUR_MAX == 1)
 			memcpy(ret->data.cstr, str, len);
 		else {
 			mbstate_t mbs;
 			const char *s;
 			size_t charlen, chars, cptr;
 
 			charlen = chars = 0;
 			cptr = 0;
 			s = (const char *) str;
 
 			memset(&mbs, 0, sizeof(mbs));
 
 			while (cptr < len) {
 				size_t n = MB_CUR_MAX;
 
 				if (n > len - cptr)
 					n = len - cptr;
 				charlen = mbrlen(s + cptr, n, &mbs);
 				switch (charlen) {
 				case 0:
 					/* FALLTHROUGH */
 				case (size_t) -1:
 					/* FALLTHROUGH */
 				case (size_t) -2:
 					ret->data.wstr[chars++] =
 					    (unsigned char) s[cptr];
 					++cptr;
 					break;
 				default:
 					n = mbrtowc(ret->data.wstr + (chars++),
 					    s + cptr, charlen, &mbs);
 					if ((n == (size_t)-1) || (n == (size_t)-2))
 						/* NOTREACHED */
 						err(2, "mbrtowc error");
 					cptr += charlen;
 				}
 			}
 
 			ret->len = chars;
 			ret->data.wstr[ret->len] = L'\0';
 		}
 	}
 	return (ret);
 }
 
 /*
  * De-allocate object memory
  */
 void
 bwsfree(const struct bwstring *s)
 {
 
 	if (s)
 		sort_free(s);
 }
 
 /*
  * Copy content of src binary string to dst.
  * If the capacity of the dst string is not sufficient,
  * then the data is truncated.
  */
 size_t
 bwscpy(struct bwstring *dst, const struct bwstring *src)
 {
 	size_t nums = src->len;
 
 	if (nums > dst->len)
 		nums = dst->len;
 	dst->len = nums;
 
 	if (MB_CUR_MAX == 1) {
 		memcpy(dst->data.cstr, src->data.cstr, nums);
 		dst->data.cstr[dst->len] = '\0';
 	} else {
 		memcpy(dst->data.wstr, src->data.wstr,
 		    SIZEOF_WCHAR_STRING(nums + 1));
 		dst->data.wstr[dst->len] = L'\0';
 	}
 
 	return (nums);
 }
 
 /*
  * Copy content of src binary string to dst,
  * with specified number of symbols to be copied.
  * If the capacity of the dst string is not sufficient,
  * then the data is truncated.
  */
 struct bwstring *
 bwsncpy(struct bwstring *dst, const struct bwstring *src, size_t size)
 {
 	size_t nums = src->len;
 
 	if (nums > dst->len)
 		nums = dst->len;
 	if (nums > size)
 		nums = size;
 	dst->len = nums;
 
 	if (MB_CUR_MAX == 1) {
 		memcpy(dst->data.cstr, src->data.cstr, nums);
 		dst->data.cstr[dst->len] = '\0';
 	} else {
 		memcpy(dst->data.wstr, src->data.wstr,
 		    SIZEOF_WCHAR_STRING(nums + 1));
 		dst->data.wstr[dst->len] = L'\0';
 	}
 
 	return (dst);
 }
 
 /*
  * Copy content of src binary string to dst,
  * with specified number of symbols to be copied.
  * An offset value can be specified, from the start of src string.
  * If the capacity of the dst string is not sufficient,
  * then the data is truncated.
  */
 struct bwstring *
 bwsnocpy(struct bwstring *dst, const struct bwstring *src, size_t offset,
     size_t size)
 {
 
 	if (offset >= src->len) {
 		dst->data.wstr[0] = 0;
 		dst->len = 0;
 	} else {
 		size_t nums = src->len - offset;
 
 		if (nums > dst->len)
 			nums = dst->len;
 		if (nums > size)
 			nums = size;
 		dst->len = nums;
 		if (MB_CUR_MAX == 1) {
 			memcpy(dst->data.cstr, src->data.cstr + offset,
 			    (nums));
 			dst->data.cstr[dst->len] = '\0';
 		} else {
 			memcpy(dst->data.wstr, src->data.wstr + offset,
 			    SIZEOF_WCHAR_STRING(nums));
 			dst->data.wstr[dst->len] = L'\0';
 		}
 	}
 	return (dst);
 }
 
 /*
  * Write binary string to the file.
  * The output is ended either with '\n' (nl == true)
  * or '\0' (nl == false).
  */
 size_t
 bwsfwrite(struct bwstring *bws, FILE *f, bool zero_ended)
 {
 
 	if (MB_CUR_MAX == 1) {
 		size_t len = bws->len;
 
 		if (!zero_ended) {
 			bws->data.cstr[len] = '\n';
 
 			if (fwrite(bws->data.cstr, len + 1, 1, f) < 1)
 				err(2, NULL);
 
 			bws->data.cstr[len] = '\0';
 		} else if (fwrite(bws->data.cstr, len + 1, 1, f) < 1)
 			err(2, NULL);
 
 		return (len + 1);
 
 	} else {
 		wchar_t eols;
 		size_t printed = 0;
 
 		eols = zero_ended ? btowc('\0') : btowc('\n');
 
 		while (printed < BWSLEN(bws)) {
 			const wchar_t *s = bws->data.wstr + printed;
 
 			if (*s == L'\0') {
 				int nums;
 
 				nums = fwprintf(f, L"%lc", *s);
 
 				if (nums != 1)
 					err(2, NULL);
 				++printed;
 			} else {
 				int nums;
 
 				nums = fwprintf(f, L"%ls", s);
 
 				if (nums < 1)
 					err(2, NULL);
 				printed += nums;
 			}
 		}
 		fwprintf(f, L"%lc", eols);
 		return (printed + 1);
 	}
 }
 
 /*
  * Allocate and read a binary string from file.
  * The strings are nl-ended or zero-ended, depending on the sort setting.
  */
 struct bwstring *
 bwsfgetln(FILE *f, size_t *len, bool zero_ended, struct reader_buffer *rb)
 {
 	wint_t eols;
 
 	eols = zero_ended ? btowc('\0') : btowc('\n');
 
 	if (!zero_ended && (MB_CUR_MAX > 1)) {
 		wchar_t *ret;
 
 		ret = fgetwln(f, len);
 
 		if (ret == NULL) {
 			if (!feof(f))
 				err(2, NULL);
 			return (NULL);
 		}
 		if (*len > 0) {
 			if (ret[*len - 1] == (wchar_t)eols)
 				--(*len);
 		}
 		return (bwssbdup(ret, *len));
 
 	} else if (!zero_ended && (MB_CUR_MAX == 1)) {
 		char *ret;
 
 		ret = fgetln(f, len);
 
 		if (ret == NULL) {
 			if (!feof(f))
 				err(2, NULL);
 			return (NULL);
 		}
 		if (*len > 0) {
 			if (ret[*len - 1] == '\n')
 				--(*len);
 		}
 		return (bwscsbdup((unsigned char*)ret, *len));
 
 	} else {
 		*len = 0;
 
 		if (feof(f))
 			return (NULL);
 
 		if (2 >= rb->fgetwln_z_buffer_size) {
 			rb->fgetwln_z_buffer_size += 256;
 			rb->fgetwln_z_buffer = sort_realloc(rb->fgetwln_z_buffer,
 			    sizeof(wchar_t) * rb->fgetwln_z_buffer_size);
 		}
 		rb->fgetwln_z_buffer[*len] = 0;
 
 		if (MB_CUR_MAX == 1)
 			while (!feof(f)) {
 				int c;
 
 				c = fgetc(f);
 
 				if (c == EOF) {
 					if (*len == 0)
 						return (NULL);
 					goto line_read_done;
 				}
 				if (c == eols)
 					goto line_read_done;
 
 				if (*len + 1 >= rb->fgetwln_z_buffer_size) {
 					rb->fgetwln_z_buffer_size += 256;
 					rb->fgetwln_z_buffer = sort_realloc(rb->fgetwln_z_buffer,
 					    SIZEOF_WCHAR_STRING(rb->fgetwln_z_buffer_size));
 				}
 
 				rb->fgetwln_z_buffer[*len] = c;
 				rb->fgetwln_z_buffer[++(*len)] = 0;
 			}
 		else
 			while (!feof(f)) {
 				wint_t c = 0;
 
 				c = fgetwc(f);
 
 				if (c == WEOF) {
 					if (*len == 0)
 						return (NULL);
 					goto line_read_done;
 				}
 				if (c == eols)
 					goto line_read_done;
 
 				if (*len + 1 >= rb->fgetwln_z_buffer_size) {
 					rb->fgetwln_z_buffer_size += 256;
 					rb->fgetwln_z_buffer = sort_realloc(rb->fgetwln_z_buffer,
 					    SIZEOF_WCHAR_STRING(rb->fgetwln_z_buffer_size));
 				}
 
 				rb->fgetwln_z_buffer[*len] = c;
 				rb->fgetwln_z_buffer[++(*len)] = 0;
 			}
 
 line_read_done:
 		/* we do not count the last 0 */
 		return (bwssbdup(rb->fgetwln_z_buffer, *len));
 	}
 }
 
 int
 bwsncmp(const struct bwstring *bws1, const struct bwstring *bws2,
     size_t offset, size_t len)
 {
 	size_t cmp_len, len1, len2;
 	int res = 0;
 
 	cmp_len = 0;
 	len1 = bws1->len;
 	len2 = bws2->len;
 
 	if (len1 <= offset) {
 		return ((len2 <= offset) ? 0 : -1);
 	} else {
 		if (len2 <= offset)
 			return (+1);
 		else {
 			len1 -= offset;
 			len2 -= offset;
 
 			cmp_len = len1;
 
 			if (len2 < cmp_len)
 				cmp_len = len2;
 
 			if (len < cmp_len)
 				cmp_len = len;
 
 			if (MB_CUR_MAX == 1) {
 				const unsigned char *s1, *s2;
 
 				s1 = bws1->data.cstr + offset;
 				s2 = bws2->data.cstr + offset;
 
 				res = memcmp(s1, s2, cmp_len);
 
 			} else {
 				const wchar_t *s1, *s2;
 
 				s1 = bws1->data.wstr + offset;
 				s2 = bws2->data.wstr + offset;
 
 				res = memcmp(s1, s2, SIZEOF_WCHAR_STRING(cmp_len));
 			}
 		}
 	}
 
 	if (res == 0) {
 		if (len1 < cmp_len && len1 < len2)
 			res = -1;
 		else if (len2 < cmp_len && len2 < len1)
 			res = +1;
 	}
 
 	return (res);
 }
 
 int
 bwscmp(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset)
 {
 	size_t len1, len2, cmp_len;
 	int res;
 
 	len1 = bws1->len;
 	len2 = bws2->len;
 
 	len1 -= offset;
 	len2 -= offset;
 
 	cmp_len = len1;
 
 	if (len2 < cmp_len)
 		cmp_len = len2;
 
 	res = bwsncmp(bws1, bws2, offset, cmp_len);
 
 	if (res == 0) {
 		if( len1 < len2)
 			res = -1;
 		else if (len2 < len1)
 			res = +1;
 	}
 
 	return (res);
 }
 
 int
 bws_iterator_cmp(bwstring_iterator iter1, bwstring_iterator iter2, size_t len)
 {
 	wchar_t c1, c2;
 	size_t i = 0;
 
 	for (i = 0; i < len; ++i) {
 		c1 = bws_get_iter_value(iter1);
 		c2 = bws_get_iter_value(iter2);
 		if (c1 != c2)
 			return (c1 - c2);
 		iter1 = bws_iterator_inc(iter1, 1);
 		iter2 = bws_iterator_inc(iter2, 1);
 	}
 
 	return (0);
 }
 
 int
 bwscoll(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset)
 {
 	size_t len1, len2;
 
 	len1 = bws1->len;
 	len2 = bws2->len;
 
 	if (len1 <= offset)
 		return ((len2 <= offset) ? 0 : -1);
 	else {
 		if (len2 <= offset)
 			return (+1);
 		else {
 			len1 -= offset;
 			len2 -= offset;
 
 			if (MB_CUR_MAX == 1) {
 				const unsigned char *s1, *s2;
 
 				s1 = bws1->data.cstr + offset;
 				s2 = bws2->data.cstr + offset;
 
 				if (byte_sort) {
 					int res = 0;
 
 					if (len1 > len2) {
 						res = memcmp(s1, s2, len2);
 						if (!res)
 							res = +1;
 					} else if (len1 < len2) {
 						res = memcmp(s1, s2, len1);
 						if (!res)
 							res = -1;
 					} else
 						res = memcmp(s1, s2, len1);
 
 					return (res);
 
 				} else {
 					int res = 0;
 					size_t i, maxlen;
 
 					i = 0;
 					maxlen = len1;
 
 					if (maxlen > len2)
 						maxlen = len2;
 
 					while (i < maxlen) {
 						/* goto next non-zero part: */
 						while ((i < maxlen) &&
 						    !s1[i] && !s2[i])
 							++i;
 
 						if (i >= maxlen)
 							break;
 
 						if (s1[i] == 0) {
 							if (s2[i] == 0)
 								/* NOTREACHED */
 								err(2, "bwscoll error 01");
 							else
 								return (-1);
 						} else if (s2[i] == 0)
 							return (+1);
 
 						res = strcoll((const char*)(s1 + i), (const char*)(s2 + i));
 						if (res)
 							return (res);
 
 						while ((i < maxlen) &&
 						    s1[i] && s2[i])
 							++i;
 
 						if (i >= maxlen)
 							break;
 
 						if (s1[i] == 0) {
 							if (s2[i] == 0) {
 								++i;
 								continue;
 							} else
 								return (-1);
 						} else if (s2[i] == 0)
 							return (+1);
 						else
 							/* NOTREACHED */
 							err(2, "bwscoll error 02");
 					}
 
 					if (len1 < len2)
 						return (-1);
 					else if (len1 > len2)
 						return (+1);
 
 					return (0);
 				}
 			} else {
 				const wchar_t *s1, *s2;
 				size_t i, maxlen;
 				int res = 0;
 
 				s1 = bws1->data.wstr + offset;
 				s2 = bws2->data.wstr + offset;
 
 				i = 0;
 				maxlen = len1;
 
 				if (maxlen > len2)
 					maxlen = len2;
 
 				while (i < maxlen) {
 
 					/* goto next non-zero part: */
 					while ((i < maxlen) &&
 					    !s1[i] && !s2[i])
 						++i;
 
 					if (i >= maxlen)
 						break;
 
 					if (s1[i] == 0) {
 						if (s2[i] == 0)
 							/* NOTREACHED */
 							err(2, "bwscoll error 1");
 						else
 							return (-1);
 					} else if (s2[i] == 0)
 						return (+1);
 
 					res = wide_str_coll(s1 + i, s2 + i);
 					if (res)
 						return (res);
 
 					while ((i < maxlen) && s1[i] && s2[i])
 						++i;
 
 					if (i >= maxlen)
 						break;
 
 					if (s1[i] == 0) {
 						if (s2[i] == 0) {
 							++i;
 							continue;
 						} else
 							return (-1);
 					} else if (s2[i] == 0)
 						return (+1);
 					else
 						/* NOTREACHED */
 						err(2, "bwscoll error 2");
 				}
 
 				if (len1 < len2)
 					return (-1);
 				else if (len1 > len2)
 					return (+1);
 
 				return (0);
 			}
 		}
 	}
 }
 
 /*
  * Correction of the system API
  */
 double
 bwstod(struct bwstring *s0, bool *empty)
 {
 	double ret = 0;
 
 	if (MB_CUR_MAX == 1) {
 		unsigned char *end, *s;
 		char *ep;
 
 		s = s0->data.cstr;
 		end = s + s0->len;
 		ep = NULL;
 
 		while (isblank(*s) && s < end)
 			++s;
 
 		if (!isprint(*s)) {
 			*empty = true;
 			return (0);
 		}
 
 		ret = strtod((char*)s, &ep);
 		if ((unsigned char*) ep == s) {
 			*empty = true;
 			return (0);
 		}
 	} else {
 		wchar_t *end, *ep, *s;
 
 		s = s0->data.wstr;
 		end = s + s0->len;
 		ep = NULL;
 
 		while (iswblank(*s) && s < end)
 			++s;
 
 		if (!iswprint(*s)) {
 			*empty = true;
 			return (0);
 		}
 
 		ret = wcstod(s, &ep);
 		if (ep == s) {
 			*empty = true;
 			return (0);
 		}
 	}
 
 	*empty = false;
 	return (ret);
 }
 
 /*
  * A helper function for monthcoll.  If a line matches
  * a month name, it returns (number of the month - 1),
  * while if there is no match, it just return -1.
  */
 
 int
 bws_month_score(const struct bwstring *s0)
 {
 
 	if (MB_CUR_MAX == 1) {
 		const unsigned char *end, *s;
-		size_t len;
 
 		s = s0->data.cstr;
 		end = s + s0->len;
 
 		while (isblank(*s) && s < end)
 			++s;
 
-		len = strlen((const char*)s);
-
 		for (int i = 11; i >= 0; --i) {
 			if (cmonths[i] &&
 			    (s == (unsigned char*)strstr((const char*)s, (char*)(cmonths[i]))))
 				return (i);
 		}
 
 	} else {
 		const wchar_t *end, *s;
-		size_t len;
 
 		s = s0->data.wstr;
 		end = s + s0->len;
 
 		while (iswblank(*s) && s < end)
 			++s;
-
-		len = wcslen(s);
 
 		for (int i = 11; i >= 0; --i) {
 			if (wmonths[i] && (s == wcsstr(s, wmonths[i])))
 				return (i);
 		}
 	}
 
 	return (-1);
 }
 
 /*
  * Rips out leading blanks (-b).
  */
 struct bwstring *
 ignore_leading_blanks(struct bwstring *str)
 {
 
 	if (MB_CUR_MAX == 1) {
 		unsigned char *dst, *end, *src;
 
 		src = str->data.cstr;
 		dst = src;
 		end = src + str->len;
 
 		while (src < end && isblank(*src))
 			++src;
 
 		if (src != dst) {
 			size_t newlen;
 
 			newlen = BWSLEN(str) - (src - dst);
 
 			while (src < end) {
 				*dst = *src;
 				++dst;
 				++src;
 			}
 			bws_setlen(str, newlen);
 		}
 	} else {
 		wchar_t *dst, *end, *src;
 
 		src = str->data.wstr;
 		dst = src;
 		end = src + str->len;
 
 		while (src < end && iswblank(*src))
 			++src;
 
 		if (src != dst) {
 
 			size_t newlen = BWSLEN(str) - (src - dst);
 
 			while (src < end) {
 				*dst = *src;
 				++dst;
 				++src;
 			}
 			bws_setlen(str, newlen);
 
 		}
 	}
 	return (str);
 }
 
 /*
  * Rips out nonprinting characters (-i).
  */
 struct bwstring *
 ignore_nonprinting(struct bwstring *str)
 {
 	size_t newlen = str->len;
 
 	if (MB_CUR_MAX == 1) {
 		unsigned char *dst, *end, *src;
 		unsigned char c;
 
 		src = str->data.cstr;
 		dst = src;
 		end = src + str->len;
 
 		while (src < end) {
 			c = *src;
 			if (isprint(c)) {
 				*dst = c;
 				++dst;
 				++src;
 			} else {
 				++src;
 				--newlen;
 			}
 		}
 	} else {
 		wchar_t *dst, *end, *src;
 		wchar_t c;
 
 		src = str->data.wstr;
 		dst = src;
 		end = src + str->len;
 
 		while (src < end) {
 			c = *src;
 			if (iswprint(c)) {
 				*dst = c;
 				++dst;
 				++src;
 			} else {
 				++src;
 				--newlen;
 			}
 		}
 	}
 	bws_setlen(str, newlen);
 
 	return (str);
 }
 
 /*
  * Rips out any characters that are not alphanumeric characters
  * nor blanks (-d).
  */
 struct bwstring *
 dictionary_order(struct bwstring *str)
 {
 	size_t newlen = str->len;
 
 	if (MB_CUR_MAX == 1) {
 		unsigned char *dst, *end, *src;
 		unsigned char c;
 
 		src = str->data.cstr;
 		dst = src;
 		end = src + str->len;
 
 		while (src < end) {
 			c = *src;
 			if (isalnum(c) || isblank(c)) {
 				*dst = c;
 				++dst;
 				++src;
 			} else {
 				++src;
 				--newlen;
 			}
 		}
 	} else {
 		wchar_t *dst, *end, *src;
 		wchar_t c;
 
 		src = str->data.wstr;
 		dst = src;
 		end = src + str->len;
 
 		while (src < end) {
 			c = *src;
 			if (iswalnum(c) || iswblank(c)) {
 				*dst = c;
 				++dst;
 				++src;
 			} else {
 				++src;
 				--newlen;
 			}
 		}
 	}
 	bws_setlen(str, newlen);
 
 	return (str);
 }
 
 /*
  * Converts string to lower case(-f).
  */
 struct bwstring *
 ignore_case(struct bwstring *str)
 {
 
 	if (MB_CUR_MAX == 1) {
 		unsigned char *end, *s;
 
 		s = str->data.cstr;
 		end = s + str->len;
 
 		while (s < end) {
 			*s = toupper(*s);
 			++s;
 		}
 	} else {
 		wchar_t *end, *s;
 
 		s = str->data.wstr;
 		end = s + str->len;
 
 		while (s < end) {
 			*s = towupper(*s);
 			++s;
 		}
 	}
 	return (str);
 }
 
 void
 bws_disorder_warnx(struct bwstring *s, const char *fn, size_t pos)
 {
 
 	if (MB_CUR_MAX == 1)
 		warnx("%s:%zu: disorder: %s", fn, pos + 1, s->data.cstr);
 	else
 		warnx("%s:%zu: disorder: %ls", fn, pos + 1, s->data.wstr);
 }
Index: projects/clang391-import/usr.bin/sort/sort.c
===================================================================
--- projects/clang391-import/usr.bin/sort/sort.c	(revision 309262)
+++ projects/clang391-import/usr.bin/sort/sort.c	(revision 309263)
@@ -1,1321 +1,1315 @@
 /*-
  * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
  * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/stat.h>
 #include <sys/sysctl.h>
 #include <sys/types.h>
 
 #include <err.h>
 #include <errno.h>
 #include <getopt.h>
 #include <limits.h>
 #include <locale.h>
 #include <md5.h>
 #include <regex.h>
 #include <signal.h>
 #include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
 #include <wchar.h>
 #include <wctype.h>
 
 #include "coll.h"
 #include "file.h"
 #include "sort.h"
 
 #ifndef WITHOUT_NLS
 #include <nl_types.h>
 nl_catd catalog;
 #endif
 
 #define	OPTIONS	"bcCdfghik:Mmno:RrsS:t:T:uVz"
 
 #define DEFAULT_RANDOM_SORT_SEED_FILE ("/dev/random")
 #define MAX_DEFAULT_RANDOM_SEED_DATA_SIZE (1024)
 
 static bool need_random;
 static const char *random_source = DEFAULT_RANDOM_SORT_SEED_FILE;
 static const void *random_seed;
 static size_t random_seed_size;
 
 MD5_CTX md5_ctx;
 
 /*
  * Default messages to use when NLS is disabled or no catalogue
  * is found.
  */
 const char *nlsstr[] = { "",
 /* 1*/"mutually exclusive flags",
 /* 2*/"extra argument not allowed with -c",
 /* 3*/"Unknown feature",
 /* 4*/"Wrong memory buffer specification",
 /* 5*/"0 field in key specs",
 /* 6*/"0 column in key specs",
 /* 7*/"Wrong file mode",
 /* 8*/"Cannot open file for reading",
 /* 9*/"Radix sort cannot be used with these sort options",
 /*10*/"The chosen sort method cannot be used with stable and/or unique sort",
 /*11*/"Invalid key position",
 /*12*/"Usage: %s [-bcCdfigMmnrsuz] [-kPOS1[,POS2] ... ] "
       "[+POS1 [-POS2]] [-S memsize] [-T tmpdir] [-t separator] "
       "[-o outfile] [--batch-size size] [--files0-from file] "
       "[--heapsort] [--mergesort] [--radixsort] [--qsort] "
       "[--mmap] "
 #if defined(SORT_THREADS)
       "[--parallel thread_no] "
 #endif
       "[--human-numeric-sort] "
       "[--version-sort] [--random-sort [--random-source file]] "
       "[--compress-program program] [file ...]\n" };
 
 struct sort_opts sort_opts_vals;
 
 bool debug_sort;
 bool need_hint;
 
 #if defined(SORT_THREADS)
 unsigned int ncpu = 1;
 size_t nthreads = 1;
 #endif
 
 static bool gnusort_numeric_compatibility;
 
 static struct sort_mods default_sort_mods_object;
 struct sort_mods * const default_sort_mods = &default_sort_mods_object;
 
 static bool print_symbols_on_debug;
 
 /*
  * Arguments from file (when file0-from option is used:
  */
 static size_t argc_from_file0 = (size_t)-1;
 static char **argv_from_file0;
 
 /*
  * Placeholder symbols for options which have no single-character equivalent
  */
 enum
 {
 	SORT_OPT = CHAR_MAX + 1,
 	HELP_OPT,
 	FF_OPT,
 	BS_OPT,
 	VERSION_OPT,
 	DEBUG_OPT,
 #if defined(SORT_THREADS)
 	PARALLEL_OPT,
 #endif
 	RANDOMSOURCE_OPT,
 	COMPRESSPROGRAM_OPT,
 	QSORT_OPT,
 	MERGESORT_OPT,
 	HEAPSORT_OPT,
 	RADIXSORT_OPT,
 	MMAP_OPT
 };
 
 #define	NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS 6
 static const char mutually_exclusive_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = { 'M', 'n', 'g', 'R', 'h', 'V' };
 
 static struct option long_options[] = {
 				{ "batch-size", required_argument, NULL, BS_OPT },
 				{ "buffer-size", required_argument, NULL, 'S' },
 				{ "check", optional_argument, NULL, 'c' },
 				{ "check=silent|quiet", optional_argument, NULL, 'C' },
 				{ "compress-program", required_argument, NULL, COMPRESSPROGRAM_OPT },
 				{ "debug", no_argument, NULL, DEBUG_OPT },
 				{ "dictionary-order", no_argument, NULL, 'd' },
 				{ "field-separator", required_argument, NULL, 't' },
 				{ "files0-from", required_argument, NULL, FF_OPT },
 				{ "general-numeric-sort", no_argument, NULL, 'g' },
 				{ "heapsort", no_argument, NULL, HEAPSORT_OPT },
 				{ "help",no_argument, NULL, HELP_OPT },
 				{ "human-numeric-sort", no_argument, NULL, 'h' },
 				{ "ignore-leading-blanks", no_argument, NULL, 'b' },
 				{ "ignore-case", no_argument, NULL, 'f' },
 				{ "ignore-nonprinting", no_argument, NULL, 'i' },
 				{ "key", required_argument, NULL, 'k' },
 				{ "merge", no_argument, NULL, 'm' },
 				{ "mergesort", no_argument, NULL, MERGESORT_OPT },
 				{ "mmap", no_argument, NULL, MMAP_OPT },
 				{ "month-sort", no_argument, NULL, 'M' },
 				{ "numeric-sort", no_argument, NULL, 'n' },
 				{ "output", required_argument, NULL, 'o' },
 #if defined(SORT_THREADS)
 				{ "parallel", required_argument, NULL, PARALLEL_OPT },
 #endif
 				{ "qsort", no_argument, NULL, QSORT_OPT },
 				{ "radixsort", no_argument, NULL, RADIXSORT_OPT },
 				{ "random-sort", no_argument, NULL, 'R' },
 				{ "random-source", required_argument, NULL, RANDOMSOURCE_OPT },
 				{ "reverse", no_argument, NULL, 'r' },
 				{ "sort", required_argument, NULL, SORT_OPT },
 				{ "stable", no_argument, NULL, 's' },
 				{ "temporary-directory",required_argument, NULL, 'T' },
 				{ "unique", no_argument, NULL, 'u' },
 				{ "version", no_argument, NULL, VERSION_OPT },
 				{ "version-sort",no_argument, NULL, 'V' },
 				{ "zero-terminated", no_argument, NULL, 'z' },
 				{ NULL, no_argument, NULL, 0 }
 };
 
 void fix_obsolete_keys(int *argc, char **argv);
 
 /*
  * Check where sort modifier is present
  */
 static bool
 sort_modifier_empty(struct sort_mods *sm)
 {
 
 	if (sm == NULL)
 		return (true);
 	return (!(sm->Mflag || sm->Vflag || sm->nflag || sm->gflag ||
 	    sm->rflag || sm->Rflag || sm->hflag || sm->dflag || sm->fflag));
 }
 
 /*
  * Print out usage text.
  */
 static void
 usage(bool opt_err)
 {
-	struct option *o;
 	FILE *out;
 
-	out = stdout;
-	o = &(long_options[0]);
+	out = opt_err ? stderr : stdout;
 
-	if (opt_err)
-		out = stderr;
 	fprintf(out, getstr(12), getprogname());
 	if (opt_err)
 		exit(2);
 	exit(0);
 }
 
 /*
  * Read input file names from a file (file0-from option).
  */
 static void
 read_fns_from_file0(const char *fn)
 {
 	FILE *f;
 	char *line = NULL;
 	size_t linesize = 0;
 	ssize_t linelen;
 
 	if (fn == NULL)
 		return;
 
 	f = fopen(fn, "r");
 	if (f == NULL)
 		err(2, "%s", fn);
 
 	while ((linelen = getdelim(&line, &linesize, '\0', f)) != -1) {
 		if (*line != '\0') {
 			if (argc_from_file0 == (size_t) - 1)
 				argc_from_file0 = 0;
 			++argc_from_file0;
 			argv_from_file0 = sort_realloc(argv_from_file0,
 			    argc_from_file0 * sizeof(char *));
 			if (argv_from_file0 == NULL)
 				err(2, NULL);
 			argv_from_file0[argc_from_file0 - 1] = line;
 		} else {
 			free(line);
 		}
 		line = NULL;
 		linesize = 0;
 	}
 	if (ferror(f))
 		err(2, "%s: getdelim", fn);
 
 	closefile(f, fn);
 }
 
 /*
  * Check how much RAM is available for the sort.
  */
 static void
 set_hw_params(void)
 {
 	long pages, psize;
 
-	pages = psize = 0;
-
 #if defined(SORT_THREADS)
 	ncpu = 1;
 #endif
 
 	pages = sysconf(_SC_PHYS_PAGES);
 	if (pages < 1) {
 		perror("sysconf pages");
-		psize = 1;
+		pages = 1;
 	}
 	psize = sysconf(_SC_PAGESIZE);
 	if (psize < 1) {
 		perror("sysconf psize");
 		psize = 4096;
 	}
 #if defined(SORT_THREADS)
 	ncpu = (unsigned int)sysconf(_SC_NPROCESSORS_ONLN);
 	if (ncpu < 1)
 		ncpu = 1;
 	else if(ncpu > 32)
 		ncpu = 32;
 
 	nthreads = ncpu;
 #endif
 
 	free_memory = (unsigned long long) pages * (unsigned long long) psize;
 	available_free_memory = free_memory / 2;
 
 	if (available_free_memory < 1024)
 		available_free_memory = 1024;
 }
 
 /*
  * Convert "plain" symbol to wide symbol, with default value.
  */
 static void
 conv_mbtowc(wchar_t *wc, const char *c, const wchar_t def)
 {
 
 	if (wc && c) {
 		int res;
 
 		res = mbtowc(wc, c, MB_CUR_MAX);
 		if (res < 1)
 			*wc = def;
 	}
 }
 
 /*
  * Set current locale symbols.
  */
 static void
 set_locale(void)
 {
 	struct lconv *lc;
 	const char *locale;
 
 	setlocale(LC_ALL, "");
 
 	lc = localeconv();
 
 	if (lc) {
 		/* obtain LC_NUMERIC info */
 		/* Convert to wide char form */
 		conv_mbtowc(&symbol_decimal_point, lc->decimal_point,
 		    symbol_decimal_point);
 		conv_mbtowc(&symbol_thousands_sep, lc->thousands_sep,
 		    symbol_thousands_sep);
 		conv_mbtowc(&symbol_positive_sign, lc->positive_sign,
 		    symbol_positive_sign);
 		conv_mbtowc(&symbol_negative_sign, lc->negative_sign,
 		    symbol_negative_sign);
 	}
 
 	if (getenv("GNUSORT_NUMERIC_COMPATIBILITY"))
 		gnusort_numeric_compatibility = true;
 
 	locale = setlocale(LC_COLLATE, NULL);
 
 	if (locale) {
 		char *tmpl;
 		const char *cclocale;
 
 		tmpl = sort_strdup(locale);
 		cclocale = setlocale(LC_COLLATE, "C");
 		if (cclocale && !strcmp(cclocale, tmpl))
 			byte_sort = true;
 		else {
 			const char *pclocale;
 
 			pclocale = setlocale(LC_COLLATE, "POSIX");
 			if (pclocale && !strcmp(pclocale, tmpl))
 				byte_sort = true;
 		}
 		setlocale(LC_COLLATE, tmpl);
 		sort_free(tmpl);
 	}
 }
 
 /*
  * Set directory temporary files.
  */
 static void
 set_tmpdir(void)
 {
 	char *td;
 
 	td = getenv("TMPDIR");
 	if (td != NULL)
 		tmpdir = sort_strdup(td);
 }
 
 /*
  * Parse -S option.
  */
 static unsigned long long
 parse_memory_buffer_value(const char *value)
 {
 
 	if (value == NULL)
 		return (available_free_memory);
 	else {
 		char *endptr;
 		unsigned long long membuf;
 
 		endptr = NULL;
 		errno = 0;
 		membuf = strtoll(value, &endptr, 10);
 
 		if (errno != 0) {
 			warn("%s",getstr(4));
 			membuf = available_free_memory;
 		} else {
 			switch (*endptr){
 			case 'Y':
 				membuf *= 1024;
 				/* FALLTHROUGH */
 			case 'Z':
 				membuf *= 1024;
 				/* FALLTHROUGH */
 			case 'E':
 				membuf *= 1024;
 				/* FALLTHROUGH */
 			case 'P':
 				membuf *= 1024;
 				/* FALLTHROUGH */
 			case 'T':
 				membuf *= 1024;
 				/* FALLTHROUGH */
 			case 'G':
 				membuf *= 1024;
 				/* FALLTHROUGH */
 			case 'M':
 				membuf *= 1024;
 				/* FALLTHROUGH */
 			case '\0':
 			case 'K':
 				membuf *= 1024;
 				/* FALLTHROUGH */
 			case 'b':
 				break;
 			case '%':
 				membuf = (available_free_memory * membuf) /
 				    100;
 				break;
 			default:
 				warnc(EINVAL, "%s", optarg);
 				membuf = available_free_memory;
 			}
 		}
 		return (membuf);
 	}
 }
 
 /*
  * Signal handler that clears the temporary files.
  */
 static void
 sig_handler(int sig __unused, siginfo_t *siginfo __unused,
     void *context __unused)
 {
 
 	clear_tmp_files();
 	exit(-1);
 }
 
 /*
  * Set signal handler on panic signals.
  */
 static void
 set_signal_handler(void)
 {
 	struct sigaction sa;
 
 	memset(&sa, 0, sizeof(sa));
 	sa.sa_sigaction = &sig_handler;
 	sa.sa_flags = SA_SIGINFO;
 
 	if (sigaction(SIGTERM, &sa, NULL) < 0) {
 		perror("sigaction");
 		return;
 	}
 	if (sigaction(SIGHUP, &sa, NULL) < 0) {
 		perror("sigaction");
 		return;
 	}
 	if (sigaction(SIGINT, &sa, NULL) < 0) {
 		perror("sigaction");
 		return;
 	}
 	if (sigaction(SIGQUIT, &sa, NULL) < 0) {
 		perror("sigaction");
 		return;
 	}
 	if (sigaction(SIGABRT, &sa, NULL) < 0) {
 		perror("sigaction");
 		return;
 	}
 	if (sigaction(SIGBUS, &sa, NULL) < 0) {
 		perror("sigaction");
 		return;
 	}
 	if (sigaction(SIGSEGV, &sa, NULL) < 0) {
 		perror("sigaction");
 		return;
 	}
 	if (sigaction(SIGUSR1, &sa, NULL) < 0) {
 		perror("sigaction");
 		return;
 	}
 	if (sigaction(SIGUSR2, &sa, NULL) < 0) {
 		perror("sigaction");
 		return;
 	}
 }
 
 /*
  * Print "unknown" message and exit with status 2.
  */
 static void
 unknown(const char *what)
 {
 
 	errx(2, "%s: %s", getstr(3), what);
 }
 
 /*
  * Check whether contradictory input options are used.
  */
 static void
 check_mutually_exclusive_flags(char c, bool *mef_flags)
 {
 	int fo_index, mec;
 	bool found_others, found_this;
 
 	found_others = found_this = false;
 	fo_index = 0;
 
 	for (int i = 0; i < NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS; i++) {
 		mec = mutually_exclusive_flags[i];
 
 		if (mec != c) {
 			if (mef_flags[i]) {
 				if (found_this)
 					errx(1, "%c:%c: %s", c, mec, getstr(1));
 				found_others = true;
 				fo_index = i;
 			}
 		} else {
 			if (found_others)
 				errx(1, "%c:%c: %s", c, mutually_exclusive_flags[fo_index], getstr(1));
 			mef_flags[i] = true;
 			found_this = true;
 		}
 	}
 }
 
 /*
  * Initialise sort opts data.
  */
 static void
 set_sort_opts(void)
 {
 
 	memset(&default_sort_mods_object, 0,
 	    sizeof(default_sort_mods_object));
 	memset(&sort_opts_vals, 0, sizeof(sort_opts_vals));
 	default_sort_mods_object.func =
 	    get_sort_func(&default_sort_mods_object);
 }
 
 /*
  * Set a sort modifier on a sort modifiers object.
  */
 static bool
 set_sort_modifier(struct sort_mods *sm, int c)
 {
 
 	if (sm) {
 		switch (c){
 		case 'b':
 			sm->bflag = true;
 			break;
 		case 'd':
 			sm->dflag = true;
 			break;
 		case 'f':
 			sm->fflag = true;
 			break;
 		case 'g':
 			sm->gflag = true;
 			need_hint = true;
 			break;
 		case 'i':
 			sm->iflag = true;
 			break;
 		case 'R':
 			sm->Rflag = true;
 			need_random = true;
 			break;
 		case 'M':
 			initialise_months();
 			sm->Mflag = true;
 			need_hint = true;
 			break;
 		case 'n':
 			sm->nflag = true;
 			need_hint = true;
 			print_symbols_on_debug = true;
 			break;
 		case 'r':
 			sm->rflag = true;
 			break;
 		case 'V':
 			sm->Vflag = true;
 			break;
 		case 'h':
 			sm->hflag = true;
 			need_hint = true;
 			print_symbols_on_debug = true;
 			break;
 		default:
 			return false;
 		}
 		sort_opts_vals.complex_sort = true;
 		sm->func = get_sort_func(sm);
 	}
 	return (true);
 }
 
 /*
  * Parse POS in -k option.
  */
 static int
 parse_pos(const char *s, struct key_specs *ks, bool *mef_flags, bool second)
 {
 	regmatch_t pmatch[4];
 	regex_t re;
 	char *c, *f;
 	const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([bdfirMngRhV]+)?$";
 	size_t len, nmatch;
 	int ret;
 
 	ret = -1;
 	nmatch = 4;
 	c = f = NULL;
 
 	if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
 		return (-1);
 
 	if (regexec(&re, s, nmatch, pmatch, 0) != 0)
 		goto end;
 
 	if (pmatch[0].rm_eo <= pmatch[0].rm_so)
 		goto end;
 
 	if (pmatch[1].rm_eo <= pmatch[1].rm_so)
 		goto end;
 
 	len = pmatch[1].rm_eo - pmatch[1].rm_so;
 	f = sort_malloc((len + 1) * sizeof(char));
 
 	strncpy(f, s + pmatch[1].rm_so, len);
 	f[len] = '\0';
 
 	if (second) {
 		errno = 0;
 		ks->f2 = (size_t) strtoul(f, NULL, 10);
 		if (errno != 0)
 			err(2, "-k");
 		if (ks->f2 == 0) {
 			warn("%s",getstr(5));
 			goto end;
 		}
 	} else {
 		errno = 0;
 		ks->f1 = (size_t) strtoul(f, NULL, 10);
 		if (errno != 0)
 			err(2, "-k");
 		if (ks->f1 == 0) {
 			warn("%s",getstr(5));
 			goto end;
 		}
 	}
 
 	if (pmatch[2].rm_eo > pmatch[2].rm_so) {
 		len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
 		c = sort_malloc((len + 1) * sizeof(char));
 
 		strncpy(c, s + pmatch[2].rm_so + 1, len);
 		c[len] = '\0';
 
 		if (second) {
 			errno = 0;
 			ks->c2 = (size_t) strtoul(c, NULL, 10);
 			if (errno != 0)
 				err(2, "-k");
 		} else {
 			errno = 0;
 			ks->c1 = (size_t) strtoul(c, NULL, 10);
 			if (errno != 0)
 				err(2, "-k");
 			if (ks->c1 == 0) {
 				warn("%s",getstr(6));
 				goto end;
 			}
 		}
 	} else {
 		if (second)
 			ks->c2 = 0;
 		else
 			ks->c1 = 1;
 	}
 
 	if (pmatch[3].rm_eo > pmatch[3].rm_so) {
 		regoff_t i = 0;
 
 		for (i = pmatch[3].rm_so; i < pmatch[3].rm_eo; i++) {
 			check_mutually_exclusive_flags(s[i], mef_flags);
 			if (s[i] == 'b') {
 				if (second)
 					ks->pos2b = true;
 				else
 					ks->pos1b = true;
 			} else if (!set_sort_modifier(&(ks->sm), s[i]))
 				goto end;
 		}
 	}
 
 	ret = 0;
 
 end:
 
 	if (c)
 		sort_free(c);
 	if (f)
 		sort_free(f);
 	regfree(&re);
 
 	return (ret);
 }
 
 /*
  * Parse -k option value.
  */
 static int
 parse_k(const char *s, struct key_specs *ks)
 {
 	int ret = -1;
 	bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
 	    { false, false, false, false, false, false };
 
 	if (s && *s) {
 		char *sptr;
 
 		sptr = strchr(s, ',');
 		if (sptr) {
 			size_t size1;
 			char *pos1, *pos2;
 
 			size1 = sptr - s;
 
 			if (size1 < 1)
 				return (-1);
 			pos1 = sort_malloc((size1 + 1) * sizeof(char));
 
 			strncpy(pos1, s, size1);
 			pos1[size1] = '\0';
 
 			ret = parse_pos(pos1, ks, mef_flags, false);
 
 			sort_free(pos1);
 			if (ret < 0)
 				return (ret);
 
 			pos2 = sort_strdup(sptr + 1);
 			ret = parse_pos(pos2, ks, mef_flags, true);
 			sort_free(pos2);
 		} else
 			ret = parse_pos(s, ks, mef_flags, false);
 	}
 
 	return (ret);
 }
 
 /*
  * Parse POS in +POS -POS option.
  */
 static int
 parse_pos_obs(const char *s, int *nf, int *nc, char* sopts)
 {
 	regex_t re;
 	regmatch_t pmatch[4];
 	char *c, *f;
 	const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([A-Za-z]+)?$";
 	int ret;
 	size_t len, nmatch;
 
 	ret = -1;
 	nmatch = 4;
 	c = f = NULL;
 	*nc = *nf = 0;
 
 	if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
 		return (-1);
 
 	if (regexec(&re, s, nmatch, pmatch, 0) != 0)
 		goto end;
 
 	if (pmatch[0].rm_eo <= pmatch[0].rm_so)
 		goto end;
 
 	if (pmatch[1].rm_eo <= pmatch[1].rm_so)
 		goto end;
 
 	len = pmatch[1].rm_eo - pmatch[1].rm_so;
 	f = sort_malloc((len + 1) * sizeof(char));
 
 	strncpy(f, s + pmatch[1].rm_so, len);
 	f[len] = '\0';
 
 	errno = 0;
 	*nf = (size_t) strtoul(f, NULL, 10);
 	if (errno != 0)
 		errx(2, "%s", getstr(11));
 
 	if (pmatch[2].rm_eo > pmatch[2].rm_so) {
 		len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
 		c = sort_malloc((len + 1) * sizeof(char));
 
 		strncpy(c, s + pmatch[2].rm_so + 1, len);
 		c[len] = '\0';
 
 		errno = 0;
 		*nc = (size_t) strtoul(c, NULL, 10);
 		if (errno != 0)
 			errx(2, "%s", getstr(11));
 	}
 
 	if (pmatch[3].rm_eo > pmatch[3].rm_so) {
 
 		len = pmatch[3].rm_eo - pmatch[3].rm_so;
 
 		strncpy(sopts, s + pmatch[3].rm_so, len);
 		sopts[len] = '\0';
 	}
 
 	ret = 0;
 
 end:
 	if (c)
 		sort_free(c);
 	if (f)
 		sort_free(f);
 	regfree(&re);
 
 	return (ret);
 }
 
 /*
  * "Translate" obsolete +POS1 -POS2 syntax into new -kPOS1,POS2 syntax
  */
 void
 fix_obsolete_keys(int *argc, char **argv)
 {
 	char sopt[129];
 
 	for (int i = 1; i < *argc; i++) {
 		char *arg1;
 
 		arg1 = argv[i];
 
 		if (strlen(arg1) > 1 && arg1[0] == '+') {
 			int c1, f1;
 			char sopts1[128];
 
 			sopts1[0] = 0;
 			c1 = f1 = 0;
 
 			if (parse_pos_obs(arg1 + 1, &f1, &c1, sopts1) < 0)
 				continue;
 			else {
 				f1 += 1;
 				c1 += 1;
 				if (i + 1 < *argc) {
 					char *arg2 = argv[i + 1];
 
 					if (strlen(arg2) > 1 &&
 					    arg2[0] == '-') {
 						int c2, f2;
 						char sopts2[128];
 
 						sopts2[0] = 0;
 						c2 = f2 = 0;
 
 						if (parse_pos_obs(arg2 + 1,
 						    &f2, &c2, sopts2) >= 0) {
 							if (c2 > 0)
 								f2 += 1;
 							sprintf(sopt, "-k%d.%d%s,%d.%d%s",
 							    f1, c1, sopts1, f2, c2, sopts2);
 							argv[i] = sort_strdup(sopt);
 							for (int j = i + 1; j + 1 < *argc; j++)
 								argv[j] = argv[j + 1];
 							*argc -= 1;
 							continue;
 						}
 					}
 				}
 				sprintf(sopt, "-k%d.%d%s", f1, c1, sopts1);
 				argv[i] = sort_strdup(sopt);
 			}
 		}
 	}
 }
 
 /*
  * Set random seed
  */
 static void
 set_random_seed(void)
 {
 	if (need_random) {
 
 		if (strcmp(random_source, DEFAULT_RANDOM_SORT_SEED_FILE) == 0) {
 			FILE* fseed;
 			MD5_CTX ctx;
 			char rsd[MAX_DEFAULT_RANDOM_SEED_DATA_SIZE];
 			size_t sz = 0;
 
 			fseed = openfile(random_source, "r");
 			while (!feof(fseed)) {
 				int cr;
 
 				cr = fgetc(fseed);
 				if (cr == EOF)
 					break;
 
 				rsd[sz++] = (char) cr;
 
 				if (sz >= MAX_DEFAULT_RANDOM_SEED_DATA_SIZE)
 					break;
 			}
 
 			closefile(fseed, random_source);
 
 			MD5Init(&ctx);
 			MD5Update(&ctx, rsd, sz);
 
 			random_seed = MD5End(&ctx, NULL);
 			random_seed_size = strlen(random_seed);
 
 		} else {
 			MD5_CTX ctx;
 			char *b;
 
 			MD5Init(&ctx);
 			b = MD5File(random_source, NULL);
 			if (b == NULL)
 				err(2, NULL);
 
 			random_seed = b;
 			random_seed_size = strlen(b);
 		}
 
 		MD5Init(&md5_ctx);
 		if(random_seed_size>0) {
 			MD5Update(&md5_ctx, random_seed, random_seed_size);
 		}
 	}
 }
 
 /*
  * Main function.
  */
 int
 main(int argc, char **argv)
 {
 	char *outfile, *real_outfile;
 	int c, result;
 	bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
 	    { false, false, false, false, false, false };
 
 	result = 0;
 	outfile = sort_strdup("-");
 	real_outfile = NULL;
 
 	struct sort_mods *sm = &default_sort_mods_object;
 
 	init_tmp_files();
 
 	set_signal_handler();
 
 	set_hw_params();
 	set_locale();
 	set_tmpdir();
 	set_sort_opts();
 
 	fix_obsolete_keys(&argc, argv);
 
 	while (((c = getopt_long(argc, argv, OPTIONS, long_options, NULL))
 	    != -1)) {
 
 		check_mutually_exclusive_flags(c, mef_flags);
 
 		if (!set_sort_modifier(sm, c)) {
 
 			switch (c) {
 			case 'c':
 				sort_opts_vals.cflag = true;
 				if (optarg) {
 					if (!strcmp(optarg, "diagnose-first"))
 						;
 					else if (!strcmp(optarg, "silent") ||
 					    !strcmp(optarg, "quiet"))
 						sort_opts_vals.csilentflag = true;
 					else if (*optarg)
 						unknown(optarg);
 				}
 				break;
 			case 'C':
 				sort_opts_vals.cflag = true;
 				sort_opts_vals.csilentflag = true;
 				break;
 			case 'k':
 			{
 				sort_opts_vals.complex_sort = true;
 				sort_opts_vals.kflag = true;
 
 				keys_num++;
 				keys = sort_realloc(keys, keys_num *
 				    sizeof(struct key_specs));
 				memset(&(keys[keys_num - 1]), 0,
 				    sizeof(struct key_specs));
 
 				if (parse_k(optarg, &(keys[keys_num - 1]))
 				    < 0) {
 					errc(2, EINVAL, "-k %s", optarg);
 				}
 
 				break;
 			}
 			case 'm':
 				sort_opts_vals.mflag = true;
 				break;
 			case 'o':
 				outfile = sort_realloc(outfile, (strlen(optarg) + 1));
 				strcpy(outfile, optarg);
 				break;
 			case 's':
 				sort_opts_vals.sflag = true;
 				break;
 			case 'S':
 				available_free_memory =
 				    parse_memory_buffer_value(optarg);
 				break;
 			case 'T':
 				tmpdir = sort_strdup(optarg);
 				break;
 			case 't':
 				while (strlen(optarg) > 1) {
 					if (optarg[0] != '\\') {
 						errc(2, EINVAL, "%s", optarg);
 					}
 					optarg += 1;
 					if (*optarg == '0') {
 						*optarg = 0;
 						break;
 					}
 				}
 				sort_opts_vals.tflag = true;
 				sort_opts_vals.field_sep = btowc(optarg[0]);
 				if (sort_opts_vals.field_sep == WEOF) {
 					errno = EINVAL;
 					err(2, NULL);
 				}
 				if (!gnusort_numeric_compatibility) {
 					if (symbol_decimal_point == sort_opts_vals.field_sep)
 						symbol_decimal_point = WEOF;
 					if (symbol_thousands_sep == sort_opts_vals.field_sep)
 						symbol_thousands_sep = WEOF;
 					if (symbol_negative_sign == sort_opts_vals.field_sep)
 						symbol_negative_sign = WEOF;
 					if (symbol_positive_sign == sort_opts_vals.field_sep)
 						symbol_positive_sign = WEOF;
 				}
 				break;
 			case 'u':
 				sort_opts_vals.uflag = true;
 				/* stable sort for the correct unique val */
 				sort_opts_vals.sflag = true;
 				break;
 			case 'z':
 				sort_opts_vals.zflag = true;
 				break;
 			case SORT_OPT:
 				if (optarg) {
 					if (!strcmp(optarg, "general-numeric"))
 						set_sort_modifier(sm, 'g');
 					else if (!strcmp(optarg, "human-numeric"))
 						set_sort_modifier(sm, 'h');
 					else if (!strcmp(optarg, "numeric"))
 						set_sort_modifier(sm, 'n');
 					else if (!strcmp(optarg, "month"))
 						set_sort_modifier(sm, 'M');
 					else if (!strcmp(optarg, "random"))
 						set_sort_modifier(sm, 'R');
 					else
 						unknown(optarg);
 				}
 				break;
 #if defined(SORT_THREADS)
 			case PARALLEL_OPT:
 				nthreads = (size_t)(atoi(optarg));
 				if (nthreads < 1)
 					nthreads = 1;
 				if (nthreads > 1024)
 					nthreads = 1024;
 				break;
 #endif
 			case QSORT_OPT:
 				sort_opts_vals.sort_method = SORT_QSORT;
 				break;
 			case MERGESORT_OPT:
 				sort_opts_vals.sort_method = SORT_MERGESORT;
 				break;
 			case MMAP_OPT:
 				use_mmap = true;
 				break;
 			case HEAPSORT_OPT:
 				sort_opts_vals.sort_method = SORT_HEAPSORT;
 				break;
 			case RADIXSORT_OPT:
 				sort_opts_vals.sort_method = SORT_RADIXSORT;
 				break;
 			case RANDOMSOURCE_OPT:
 				random_source = strdup(optarg);
 				break;
 			case COMPRESSPROGRAM_OPT:
 				compress_program = strdup(optarg);
 				break;
 			case FF_OPT:
 				read_fns_from_file0(optarg);
 				break;
 			case BS_OPT:
 			{
 				errno = 0;
 				long mof = strtol(optarg, NULL, 10);
 				if (errno != 0)
 					err(2, "--batch-size");
 				if (mof >= 2)
 					max_open_files = (size_t) mof + 1;
 			}
 				break;
 			case VERSION_OPT:
 				printf("%s\n", VERSION);
 				exit(EXIT_SUCCESS);
 				/* NOTREACHED */
 				break;
 			case DEBUG_OPT:
 				debug_sort = true;
 				break;
 			case HELP_OPT:
 				usage(false);
 				/* NOTREACHED */
 				break;
 			default:
 				usage(true);
 				/* NOTREACHED */
 			}
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	if (argv_from_file0) {
 		argc = argc_from_file0;
 		argv = argv_from_file0;
 	}
 
 #ifndef WITHOUT_NLS
 	catalog = catopen("sort", NL_CAT_LOCALE);
 #endif
 
 	if (sort_opts_vals.cflag && sort_opts_vals.mflag)
 		errx(1, "%c:%c: %s", 'm', 'c', getstr(1));
 
 #ifndef WITHOUT_NLS
 	catclose(catalog);
 #endif
 
 	if (keys_num == 0) {
 		keys_num = 1;
 		keys = sort_realloc(keys, sizeof(struct key_specs));
 		memset(&(keys[0]), 0, sizeof(struct key_specs));
 		keys[0].c1 = 1;
 		keys[0].pos1b = default_sort_mods->bflag;
 		keys[0].pos2b = default_sort_mods->bflag;
 		memcpy(&(keys[0].sm), default_sort_mods,
 		    sizeof(struct sort_mods));
 	}
 
 	for (size_t i = 0; i < keys_num; i++) {
 		struct key_specs *ks;
 
 		ks = &(keys[i]);
 
 		if (sort_modifier_empty(&(ks->sm)) && !(ks->pos1b) &&
 		    !(ks->pos2b)) {
 			ks->pos1b = sm->bflag;
 			ks->pos2b = sm->bflag;
 			memcpy(&(ks->sm), sm, sizeof(struct sort_mods));
 		}
 
 		ks->sm.func = get_sort_func(&(ks->sm));
 	}
 
 	if (debug_sort) {
 		printf("Memory to be used for sorting: %llu\n",available_free_memory);
 #if defined(SORT_THREADS)
 		printf("Number of CPUs: %d\n",(int)ncpu);
 		nthreads = 1;
 #endif
 		printf("Using collate rules of %s locale\n",
 		    setlocale(LC_COLLATE, NULL));
 		if (byte_sort)
 			printf("Byte sort is used\n");
 		if (print_symbols_on_debug) {
 			printf("Decimal Point: <%lc>\n", symbol_decimal_point);
 			if (symbol_thousands_sep)
 				printf("Thousands separator: <%lc>\n",
 				    symbol_thousands_sep);
 			printf("Positive sign: <%lc>\n", symbol_positive_sign);
 			printf("Negative sign: <%lc>\n", symbol_negative_sign);
 		}
 	}
 
 	set_random_seed();
 
 	/* Case when the outfile equals one of the input files: */
 	if (strcmp(outfile, "-")) {
 
 		for(int i = 0; i < argc; ++i) {
 			if (strcmp(argv[i], outfile) == 0) {
 				real_outfile = sort_strdup(outfile);
 				for(;;) {
 					char* tmp = sort_malloc(strlen(outfile) +
 					    strlen(".tmp") + 1);
 
 					strcpy(tmp, outfile);
 					strcpy(tmp + strlen(tmp), ".tmp");
 					sort_free(outfile);
 					outfile = tmp;
 					if (access(outfile, F_OK) < 0)
 						break;
 				}
 				tmp_file_atexit(outfile);
 			}
 		}
 	}
 
 #if defined(SORT_THREADS)
 	if ((argc < 1) || (strcmp(outfile, "-") == 0) || (*outfile == 0))
 		nthreads = 1;
 #endif
 
 	if (!sort_opts_vals.cflag && !sort_opts_vals.mflag) {
 		struct file_list fl;
 		struct sort_list list;
 
 		sort_list_init(&list);
 		file_list_init(&fl, true);
 
 		if (argc < 1)
 			procfile("-", &list, &fl);
 		else {
 			while (argc > 0) {
 				procfile(*argv, &list, &fl);
 				--argc;
 				++argv;
 			}
 		}
 
 		if (fl.count < 1)
 			sort_list_to_file(&list, outfile);
 		else {
 			if (list.count > 0) {
 				char *flast = new_tmp_file_name();
 
 				sort_list_to_file(&list, flast);
 				file_list_add(&fl, flast, false);
 			}
 			merge_files(&fl, outfile);
 		}
 
 		file_list_clean(&fl);
 
 		/*
 		 * We are about to exit the program, so we can ignore
 		 * the clean-up for speed
 		 *
 		 * sort_list_clean(&list);
 		 */
 
 	} else if (sort_opts_vals.cflag) {
 		result = (argc == 0) ? (check("-")) : (check(*argv));
 	} else if (sort_opts_vals.mflag) {
 		struct file_list fl;
 
 		file_list_init(&fl, false);
 		file_list_populate(&fl, argc, argv, true);
 		merge_files(&fl, outfile);
 		file_list_clean(&fl);
 	}
 
 	if (real_outfile) {
 		unlink(real_outfile);
 		if (rename(outfile, real_outfile) < 0)
 			err(2, NULL);
 		sort_free(real_outfile);
 	}
 
 	sort_free(outfile);
 
 	return (result);
 }
Index: projects/clang391-import/usr.sbin/ctld/ctld.c
===================================================================
--- projects/clang391-import/usr.sbin/ctld/ctld.c	(revision 309262)
+++ projects/clang391-import/usr.sbin/ctld/ctld.c	(revision 309263)
@@ -1,2726 +1,2729 @@
 /*-
  * Copyright (c) 2012 The FreeBSD Foundation
  * All rights reserved.
  *
  * This software was developed by Edward Tomasz Napierala under sponsorship
  * from the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/types.h>
 #include <sys/time.h>
 #include <sys/socket.h>
 #include <sys/stat.h>
 #include <sys/wait.h>
 #include <netinet/in.h>
 #include <arpa/inet.h>
 #include <assert.h>
 #include <ctype.h>
 #include <errno.h>
 #include <netdb.h>
 #include <signal.h>
 #include <stdbool.h>
 #include <stdio.h>
 #include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
 
 #include "ctld.h"
 #include "isns.h"
 
 bool proxy_mode = false;
 
 static volatile bool sighup_received = false;
 static volatile bool sigterm_received = false;
 static volatile bool sigalrm_received = false;
 
 static int nchildren = 0;
 static uint16_t last_portal_group_tag = 0xff;
 
 static void
 usage(void)
 {
 
 	fprintf(stderr, "usage: ctld [-d][-u][-f config-file]\n");
 	exit(1);
 }
 
 char *
 checked_strdup(const char *s)
 {
 	char *c;
 
 	c = strdup(s);
 	if (c == NULL)
 		log_err(1, "strdup");
 	return (c);
 }
 
 struct conf *
 conf_new(void)
 {
 	struct conf *conf;
 
 	conf = calloc(1, sizeof(*conf));
 	if (conf == NULL)
 		log_err(1, "calloc");
 	TAILQ_INIT(&conf->conf_luns);
 	TAILQ_INIT(&conf->conf_targets);
 	TAILQ_INIT(&conf->conf_auth_groups);
 	TAILQ_INIT(&conf->conf_ports);
 	TAILQ_INIT(&conf->conf_portal_groups);
 	TAILQ_INIT(&conf->conf_pports);
 	TAILQ_INIT(&conf->conf_isns);
 
 	conf->conf_isns_period = 900;
 	conf->conf_isns_timeout = 5;
 	conf->conf_debug = 0;
 	conf->conf_timeout = 60;
 	conf->conf_maxproc = 30;
 
 	return (conf);
 }
 
 void
 conf_delete(struct conf *conf)
 {
 	struct lun *lun, *ltmp;
 	struct target *targ, *tmp;
 	struct auth_group *ag, *cagtmp;
 	struct portal_group *pg, *cpgtmp;
 	struct pport *pp, *pptmp;
 	struct isns *is, *istmp;
 
 	assert(conf->conf_pidfh == NULL);
 
 	TAILQ_FOREACH_SAFE(lun, &conf->conf_luns, l_next, ltmp)
 		lun_delete(lun);
 	TAILQ_FOREACH_SAFE(targ, &conf->conf_targets, t_next, tmp)
 		target_delete(targ);
 	TAILQ_FOREACH_SAFE(ag, &conf->conf_auth_groups, ag_next, cagtmp)
 		auth_group_delete(ag);
 	TAILQ_FOREACH_SAFE(pg, &conf->conf_portal_groups, pg_next, cpgtmp)
 		portal_group_delete(pg);
 	TAILQ_FOREACH_SAFE(pp, &conf->conf_pports, pp_next, pptmp)
 		pport_delete(pp);
 	TAILQ_FOREACH_SAFE(is, &conf->conf_isns, i_next, istmp)
 		isns_delete(is);
 	assert(TAILQ_EMPTY(&conf->conf_ports));
 	free(conf->conf_pidfile_path);
 	free(conf);
 }
 
 static struct auth *
 auth_new(struct auth_group *ag)
 {
 	struct auth *auth;
 
 	auth = calloc(1, sizeof(*auth));
 	if (auth == NULL)
 		log_err(1, "calloc");
 	auth->a_auth_group = ag;
 	TAILQ_INSERT_TAIL(&ag->ag_auths, auth, a_next);
 	return (auth);
 }
 
 static void
 auth_delete(struct auth *auth)
 {
 	TAILQ_REMOVE(&auth->a_auth_group->ag_auths, auth, a_next);
 
 	free(auth->a_user);
 	free(auth->a_secret);
 	free(auth->a_mutual_user);
 	free(auth->a_mutual_secret);
 	free(auth);
 }
 
 const struct auth *
 auth_find(const struct auth_group *ag, const char *user)
 {
 	const struct auth *auth;
 
 	TAILQ_FOREACH(auth, &ag->ag_auths, a_next) {
 		if (strcmp(auth->a_user, user) == 0)
 			return (auth);
 	}
 
 	return (NULL);
 }
 
 static void
 auth_check_secret_length(struct auth *auth)
 {
 	size_t len;
 
 	len = strlen(auth->a_secret);
 	if (len > 16) {
 		if (auth->a_auth_group->ag_name != NULL)
 			log_warnx("secret for user \"%s\", auth-group \"%s\", "
 			    "is too long; it should be at most 16 characters "
 			    "long", auth->a_user, auth->a_auth_group->ag_name);
 		else
 			log_warnx("secret for user \"%s\", target \"%s\", "
 			    "is too long; it should be at most 16 characters "
 			    "long", auth->a_user,
 			    auth->a_auth_group->ag_target->t_name);
 	}
 	if (len < 12) {
 		if (auth->a_auth_group->ag_name != NULL)
 			log_warnx("secret for user \"%s\", auth-group \"%s\", "
 			    "is too short; it should be at least 12 characters "
 			    "long", auth->a_user,
 			    auth->a_auth_group->ag_name);
 		else
 			log_warnx("secret for user \"%s\", target \"%s\", "
 			    "is too short; it should be at least 12 characters "
 			    "long", auth->a_user,
 			    auth->a_auth_group->ag_target->t_name);
 	}
 
 	if (auth->a_mutual_secret != NULL) {
 		len = strlen(auth->a_mutual_secret);
 		if (len > 16) {
 			if (auth->a_auth_group->ag_name != NULL)
 				log_warnx("mutual secret for user \"%s\", "
 				    "auth-group \"%s\", is too long; it should "
 				    "be at most 16 characters long",
 				    auth->a_user, auth->a_auth_group->ag_name);
 			else
 				log_warnx("mutual secret for user \"%s\", "
 				    "target \"%s\", is too long; it should "
 				    "be at most 16 characters long",
 				    auth->a_user,
 				    auth->a_auth_group->ag_target->t_name);
 		}
 		if (len < 12) {
 			if (auth->a_auth_group->ag_name != NULL)
 				log_warnx("mutual secret for user \"%s\", "
 				    "auth-group \"%s\", is too short; it "
 				    "should be at least 12 characters long",
 				    auth->a_user, auth->a_auth_group->ag_name);
 			else
 				log_warnx("mutual secret for user \"%s\", "
 				    "target \"%s\", is too short; it should be "
 				    "at least 12 characters long",
 				    auth->a_user,
 				    auth->a_auth_group->ag_target->t_name);
 		}
 	}
 }
 
 const struct auth *
 auth_new_chap(struct auth_group *ag, const char *user,
     const char *secret)
 {
 	struct auth *auth;
 
 	if (ag->ag_type == AG_TYPE_UNKNOWN)
 		ag->ag_type = AG_TYPE_CHAP;
 	if (ag->ag_type != AG_TYPE_CHAP) {
 		if (ag->ag_name != NULL)
 			log_warnx("cannot mix \"chap\" authentication with "
 			    "other types for auth-group \"%s\"", ag->ag_name);
 		else
 			log_warnx("cannot mix \"chap\" authentication with "
 			    "other types for target \"%s\"",
 			    ag->ag_target->t_name);
 		return (NULL);
 	}
 
 	auth = auth_new(ag);
 	auth->a_user = checked_strdup(user);
 	auth->a_secret = checked_strdup(secret);
 
 	auth_check_secret_length(auth);
 
 	return (auth);
 }
 
 const struct auth *
 auth_new_chap_mutual(struct auth_group *ag, const char *user,
     const char *secret, const char *user2, const char *secret2)
 {
 	struct auth *auth;
 
 	if (ag->ag_type == AG_TYPE_UNKNOWN)
 		ag->ag_type = AG_TYPE_CHAP_MUTUAL;
 	if (ag->ag_type != AG_TYPE_CHAP_MUTUAL) {
 		if (ag->ag_name != NULL)
 			log_warnx("cannot mix \"chap-mutual\" authentication "
 			    "with other types for auth-group \"%s\"",
 			    ag->ag_name);
 		else
 			log_warnx("cannot mix \"chap-mutual\" authentication "
 			    "with other types for target \"%s\"",
 			    ag->ag_target->t_name);
 		return (NULL);
 	}
 
 	auth = auth_new(ag);
 	auth->a_user = checked_strdup(user);
 	auth->a_secret = checked_strdup(secret);
 	auth->a_mutual_user = checked_strdup(user2);
 	auth->a_mutual_secret = checked_strdup(secret2);
 
 	auth_check_secret_length(auth);
 
 	return (auth);
 }
 
 const struct auth_name *
 auth_name_new(struct auth_group *ag, const char *name)
 {
 	struct auth_name *an;
 
 	an = calloc(1, sizeof(*an));
 	if (an == NULL)
 		log_err(1, "calloc");
 	an->an_auth_group = ag;
 	an->an_initator_name = checked_strdup(name);
 	TAILQ_INSERT_TAIL(&ag->ag_names, an, an_next);
 	return (an);
 }
 
 static void
 auth_name_delete(struct auth_name *an)
 {
 	TAILQ_REMOVE(&an->an_auth_group->ag_names, an, an_next);
 
 	free(an->an_initator_name);
 	free(an);
 }
 
 bool
 auth_name_defined(const struct auth_group *ag)
 {
 	if (TAILQ_EMPTY(&ag->ag_names))
 		return (false);
 	return (true);
 }
 
 const struct auth_name *
 auth_name_find(const struct auth_group *ag, const char *name)
 {
 	const struct auth_name *auth_name;
 
 	TAILQ_FOREACH(auth_name, &ag->ag_names, an_next) {
 		if (strcmp(auth_name->an_initator_name, name) == 0)
 			return (auth_name);
 	}
 
 	return (NULL);
 }
 
 int
 auth_name_check(const struct auth_group *ag, const char *initiator_name)
 {
 	if (!auth_name_defined(ag))
 		return (0);
 
 	if (auth_name_find(ag, initiator_name) == NULL)
 		return (1);
 
 	return (0);
 }
 
 const struct auth_portal *
 auth_portal_new(struct auth_group *ag, const char *portal)
 {
 	struct auth_portal *ap;
 	char *net, *mask, *str, *tmp;
 	int len, dm, m;
 
 	ap = calloc(1, sizeof(*ap));
 	if (ap == NULL)
 		log_err(1, "calloc");
 	ap->ap_auth_group = ag;
 	ap->ap_initator_portal = checked_strdup(portal);
 	mask = str = checked_strdup(portal);
 	net = strsep(&mask, "/");
 	if (net[0] == '[')
 		net++;
 	len = strlen(net);
 	if (len == 0)
 		goto error;
 	if (net[len - 1] == ']')
 		net[len - 1] = 0;
 	if (strchr(net, ':') != NULL) {
 		struct sockaddr_in6 *sin6 =
 		    (struct sockaddr_in6 *)&ap->ap_sa;
 
 		sin6->sin6_len = sizeof(*sin6);
 		sin6->sin6_family = AF_INET6;
 		if (inet_pton(AF_INET6, net, &sin6->sin6_addr) <= 0)
 			goto error;
 		dm = 128;
 	} else {
 		struct sockaddr_in *sin =
 		    (struct sockaddr_in *)&ap->ap_sa;
 
 		sin->sin_len = sizeof(*sin);
 		sin->sin_family = AF_INET;
 		if (inet_pton(AF_INET, net, &sin->sin_addr) <= 0)
 			goto error;
 		dm = 32;
 	}
 	if (mask != NULL) {
 		m = strtol(mask, &tmp, 0);
 		if (m < 0 || m > dm || tmp[0] != 0)
 			goto error;
 	} else
 		m = dm;
 	ap->ap_mask = m;
 	free(str);
 	TAILQ_INSERT_TAIL(&ag->ag_portals, ap, ap_next);
 	return (ap);
 
 error:
+	free(str);
 	free(ap);
 	log_warnx("incorrect initiator portal \"%s\"", portal);
 	return (NULL);
 }
 
 static void
 auth_portal_delete(struct auth_portal *ap)
 {
 	TAILQ_REMOVE(&ap->ap_auth_group->ag_portals, ap, ap_next);
 
 	free(ap->ap_initator_portal);
 	free(ap);
 }
 
 bool
 auth_portal_defined(const struct auth_group *ag)
 {
 	if (TAILQ_EMPTY(&ag->ag_portals))
 		return (false);
 	return (true);
 }
 
 const struct auth_portal *
 auth_portal_find(const struct auth_group *ag, const struct sockaddr_storage *ss)
 {
 	const struct auth_portal *ap;
 	const uint8_t *a, *b;
 	int i;
 	uint8_t bmask;
 
 	TAILQ_FOREACH(ap, &ag->ag_portals, ap_next) {
 		if (ap->ap_sa.ss_family != ss->ss_family)
 			continue;
 		if (ss->ss_family == AF_INET) {
 			a = (const uint8_t *)
 			    &((const struct sockaddr_in *)ss)->sin_addr;
 			b = (const uint8_t *)
 			    &((const struct sockaddr_in *)&ap->ap_sa)->sin_addr;
 		} else {
 			a = (const uint8_t *)
 			    &((const struct sockaddr_in6 *)ss)->sin6_addr;
 			b = (const uint8_t *)
 			    &((const struct sockaddr_in6 *)&ap->ap_sa)->sin6_addr;
 		}
 		for (i = 0; i < ap->ap_mask / 8; i++) {
 			if (a[i] != b[i])
 				goto next;
 		}
 		if (ap->ap_mask % 8) {
 			bmask = 0xff << (8 - (ap->ap_mask % 8));
 			if ((a[i] & bmask) != (b[i] & bmask))
 				goto next;
 		}
 		return (ap);
 next:
 		;
 	}
 
 	return (NULL);
 }
 
 int
 auth_portal_check(const struct auth_group *ag, const struct sockaddr_storage *sa)
 {
 
 	if (!auth_portal_defined(ag))
 		return (0);
 
 	if (auth_portal_find(ag, sa) == NULL)
 		return (1);
 
 	return (0);
 }
 
 struct auth_group *
 auth_group_new(struct conf *conf, const char *name)
 {
 	struct auth_group *ag;
 
 	if (name != NULL) {
 		ag = auth_group_find(conf, name);
 		if (ag != NULL) {
 			log_warnx("duplicated auth-group \"%s\"", name);
 			return (NULL);
 		}
 	}
 
 	ag = calloc(1, sizeof(*ag));
 	if (ag == NULL)
 		log_err(1, "calloc");
 	if (name != NULL)
 		ag->ag_name = checked_strdup(name);
 	TAILQ_INIT(&ag->ag_auths);
 	TAILQ_INIT(&ag->ag_names);
 	TAILQ_INIT(&ag->ag_portals);
 	ag->ag_conf = conf;
 	TAILQ_INSERT_TAIL(&conf->conf_auth_groups, ag, ag_next);
 
 	return (ag);
 }
 
 void
 auth_group_delete(struct auth_group *ag)
 {
 	struct auth *auth, *auth_tmp;
 	struct auth_name *auth_name, *auth_name_tmp;
 	struct auth_portal *auth_portal, *auth_portal_tmp;
 
 	TAILQ_REMOVE(&ag->ag_conf->conf_auth_groups, ag, ag_next);
 
 	TAILQ_FOREACH_SAFE(auth, &ag->ag_auths, a_next, auth_tmp)
 		auth_delete(auth);
 	TAILQ_FOREACH_SAFE(auth_name, &ag->ag_names, an_next, auth_name_tmp)
 		auth_name_delete(auth_name);
 	TAILQ_FOREACH_SAFE(auth_portal, &ag->ag_portals, ap_next,
 	    auth_portal_tmp)
 		auth_portal_delete(auth_portal);
 	free(ag->ag_name);
 	free(ag);
 }
 
 struct auth_group *
 auth_group_find(const struct conf *conf, const char *name)
 {
 	struct auth_group *ag;
 
 	TAILQ_FOREACH(ag, &conf->conf_auth_groups, ag_next) {
 		if (ag->ag_name != NULL && strcmp(ag->ag_name, name) == 0)
 			return (ag);
 	}
 
 	return (NULL);
 }
 
 int
 auth_group_set_type(struct auth_group *ag, const char *str)
 {
 	int type;
 
 	if (strcmp(str, "none") == 0) {
 		type = AG_TYPE_NO_AUTHENTICATION;
 	} else if (strcmp(str, "deny") == 0) {
 		type = AG_TYPE_DENY;
 	} else if (strcmp(str, "chap") == 0) {
 		type = AG_TYPE_CHAP;
 	} else if (strcmp(str, "chap-mutual") == 0) {
 		type = AG_TYPE_CHAP_MUTUAL;
 	} else {
 		if (ag->ag_name != NULL)
 			log_warnx("invalid auth-type \"%s\" for auth-group "
 			    "\"%s\"", str, ag->ag_name);
 		else
 			log_warnx("invalid auth-type \"%s\" for target "
 			    "\"%s\"", str, ag->ag_target->t_name);
 		return (1);
 	}
 
 	if (ag->ag_type != AG_TYPE_UNKNOWN && ag->ag_type != type) {
 		if (ag->ag_name != NULL) {
 			log_warnx("cannot set auth-type to \"%s\" for "
 			    "auth-group \"%s\"; already has a different "
 			    "type", str, ag->ag_name);
 		} else {
 			log_warnx("cannot set auth-type to \"%s\" for target "
 			    "\"%s\"; already has a different type",
 			    str, ag->ag_target->t_name);
 		}
 		return (1);
 	}
 
 	ag->ag_type = type;
 
 	return (0);
 }
 
 static struct portal *
 portal_new(struct portal_group *pg)
 {
 	struct portal *portal;
 
 	portal = calloc(1, sizeof(*portal));
 	if (portal == NULL)
 		log_err(1, "calloc");
 	TAILQ_INIT(&portal->p_targets);
 	portal->p_portal_group = pg;
 	TAILQ_INSERT_TAIL(&pg->pg_portals, portal, p_next);
 	return (portal);
 }
 
 static void
 portal_delete(struct portal *portal)
 {
 
 	TAILQ_REMOVE(&portal->p_portal_group->pg_portals, portal, p_next);
 	if (portal->p_ai != NULL)
 		freeaddrinfo(portal->p_ai);
 	free(portal->p_listen);
 	free(portal);
 }
 
 struct portal_group *
 portal_group_new(struct conf *conf, const char *name)
 {
 	struct portal_group *pg;
 
 	pg = portal_group_find(conf, name);
 	if (pg != NULL) {
 		log_warnx("duplicated portal-group \"%s\"", name);
 		return (NULL);
 	}
 
 	pg = calloc(1, sizeof(*pg));
 	if (pg == NULL)
 		log_err(1, "calloc");
 	pg->pg_name = checked_strdup(name);
 	TAILQ_INIT(&pg->pg_options);
 	TAILQ_INIT(&pg->pg_portals);
 	TAILQ_INIT(&pg->pg_ports);
 	pg->pg_conf = conf;
 	pg->pg_tag = 0;		/* Assigned later in conf_apply(). */
 	TAILQ_INSERT_TAIL(&conf->conf_portal_groups, pg, pg_next);
 
 	return (pg);
 }
 
 void
 portal_group_delete(struct portal_group *pg)
 {
 	struct portal *portal, *tmp;
 	struct port *port, *tport;
 	struct option *o, *otmp;
 
 	TAILQ_FOREACH_SAFE(port, &pg->pg_ports, p_pgs, tport)
 		port_delete(port);
 	TAILQ_REMOVE(&pg->pg_conf->conf_portal_groups, pg, pg_next);
 
 	TAILQ_FOREACH_SAFE(portal, &pg->pg_portals, p_next, tmp)
 		portal_delete(portal);
 	TAILQ_FOREACH_SAFE(o, &pg->pg_options, o_next, otmp)
 		option_delete(&pg->pg_options, o);
 	free(pg->pg_name);
 	free(pg->pg_offload);
 	free(pg->pg_redirection);
 	free(pg);
 }
 
 struct portal_group *
 portal_group_find(const struct conf *conf, const char *name)
 {
 	struct portal_group *pg;
 
 	TAILQ_FOREACH(pg, &conf->conf_portal_groups, pg_next) {
 		if (strcmp(pg->pg_name, name) == 0)
 			return (pg);
 	}
 
 	return (NULL);
 }
 
 static int
 parse_addr_port(char *arg, const char *def_port, struct addrinfo **ai)
 {
 	struct addrinfo hints;
 	char *str, *addr, *ch;
 	const char *port;
 	int error, colons = 0;
 
 	str = arg = strdup(arg);
 	if (arg[0] == '[') {
 		/*
 		 * IPv6 address in square brackets, perhaps with port.
 		 */
 		arg++;
 		addr = strsep(&arg, "]");
-		if (arg == NULL)
+		if (arg == NULL) {
+			free(str);
 			return (1);
+		}
 		if (arg[0] == '\0') {
 			port = def_port;
 		} else if (arg[0] == ':') {
 			port = arg + 1;
 		} else {
 			free(str);
 			return (1);
 		}
 	} else {
 		/*
 		 * Either IPv6 address without brackets - and without
 		 * a port - or IPv4 address.  Just count the colons.
 		 */
 		for (ch = arg; *ch != '\0'; ch++) {
 			if (*ch == ':')
 				colons++;
 		}
 		if (colons > 1) {
 			addr = arg;
 			port = def_port;
 		} else {
 			addr = strsep(&arg, ":");
 			if (arg == NULL)
 				port = def_port;
 			else
 				port = arg;
 		}
 	}
 
 	memset(&hints, 0, sizeof(hints));
 	hints.ai_family = PF_UNSPEC;
 	hints.ai_socktype = SOCK_STREAM;
 	hints.ai_flags = AI_PASSIVE;
 	error = getaddrinfo(addr, port, &hints, ai);
 	free(str);
 	return ((error != 0) ? 1 : 0);
 }
 
 int
 portal_group_add_listen(struct portal_group *pg, const char *value, bool iser)
 {
 	struct portal *portal;
 
 	portal = portal_new(pg);
 	portal->p_listen = checked_strdup(value);
 	portal->p_iser = iser;
 
 	if (parse_addr_port(portal->p_listen, "3260", &portal->p_ai)) {
 		log_warnx("invalid listen address %s", portal->p_listen);
 		portal_delete(portal);
 		return (1);
 	}
 
 	/*
 	 * XXX: getaddrinfo(3) may return multiple addresses; we should turn
 	 *	those into multiple portals.
 	 */
 
 	return (0);
 }
 
 int
 isns_new(struct conf *conf, const char *addr)
 {
 	struct isns *isns;
 
 	isns = calloc(1, sizeof(*isns));
 	if (isns == NULL)
 		log_err(1, "calloc");
 	isns->i_conf = conf;
 	TAILQ_INSERT_TAIL(&conf->conf_isns, isns, i_next);
 	isns->i_addr = checked_strdup(addr);
 
 	if (parse_addr_port(isns->i_addr, "3205", &isns->i_ai)) {
 		log_warnx("invalid iSNS address %s", isns->i_addr);
 		isns_delete(isns);
 		return (1);
 	}
 
 	/*
 	 * XXX: getaddrinfo(3) may return multiple addresses; we should turn
 	 *	those into multiple servers.
 	 */
 
 	return (0);
 }
 
 void
 isns_delete(struct isns *isns)
 {
 
 	TAILQ_REMOVE(&isns->i_conf->conf_isns, isns, i_next);
 	free(isns->i_addr);
 	if (isns->i_ai != NULL)
 		freeaddrinfo(isns->i_ai);
 	free(isns);
 }
 
 static int
 isns_do_connect(struct isns *isns)
 {
 	int s;
 
 	s = socket(isns->i_ai->ai_family, isns->i_ai->ai_socktype,
 	    isns->i_ai->ai_protocol);
 	if (s < 0) {
 		log_warn("socket(2) failed for %s", isns->i_addr);
 		return (-1);
 	}
 	if (connect(s, isns->i_ai->ai_addr, isns->i_ai->ai_addrlen)) {
 		log_warn("connect(2) failed for %s", isns->i_addr);
 		close(s);
 		return (-1);
 	}
 	return(s);
 }
 
 static int
 isns_do_register(struct isns *isns, int s, const char *hostname)
 {
 	struct conf *conf = isns->i_conf;
 	struct target *target;
 	struct portal *portal;
 	struct portal_group *pg;
 	struct port *port;
 	struct isns_req *req;
 	int res = 0;
 	uint32_t error;
 
 	req = isns_req_create(ISNS_FUNC_DEVATTRREG, ISNS_FLAG_CLIENT);
 	isns_req_add_str(req, 32, TAILQ_FIRST(&conf->conf_targets)->t_name);
 	isns_req_add_delim(req);
 	isns_req_add_str(req, 1, hostname);
 	isns_req_add_32(req, 2, 2); /* 2 -- iSCSI */
 	isns_req_add_32(req, 6, conf->conf_isns_period);
 	TAILQ_FOREACH(pg, &conf->conf_portal_groups, pg_next) {
 		if (pg->pg_unassigned)
 			continue;
 		TAILQ_FOREACH(portal, &pg->pg_portals, p_next) {
 			isns_req_add_addr(req, 16, portal->p_ai);
 			isns_req_add_port(req, 17, portal->p_ai);
 		}
 	}
 	TAILQ_FOREACH(target, &conf->conf_targets, t_next) {
 		isns_req_add_str(req, 32, target->t_name);
 		isns_req_add_32(req, 33, 1); /* 1 -- Target*/
 		if (target->t_alias != NULL)
 			isns_req_add_str(req, 34, target->t_alias);
 		TAILQ_FOREACH(port, &target->t_ports, p_ts) {
 			if ((pg = port->p_portal_group) == NULL)
 				continue;
 			isns_req_add_32(req, 51, pg->pg_tag);
 			TAILQ_FOREACH(portal, &pg->pg_portals, p_next) {
 				isns_req_add_addr(req, 49, portal->p_ai);
 				isns_req_add_port(req, 50, portal->p_ai);
 			}
 		}
 	}
 	res = isns_req_send(s, req);
 	if (res < 0) {
 		log_warn("send(2) failed for %s", isns->i_addr);
 		goto quit;
 	}
 	res = isns_req_receive(s, req);
 	if (res < 0) {
 		log_warn("receive(2) failed for %s", isns->i_addr);
 		goto quit;
 	}
 	error = isns_req_get_status(req);
 	if (error != 0) {
 		log_warnx("iSNS register error %d for %s", error, isns->i_addr);
 		res = -1;
 	}
 quit:
 	isns_req_free(req);
 	return (res);
 }
 
 static int
 isns_do_check(struct isns *isns, int s, const char *hostname)
 {
 	struct conf *conf = isns->i_conf;
 	struct isns_req *req;
 	int res = 0;
 	uint32_t error;
 
 	req = isns_req_create(ISNS_FUNC_DEVATTRQRY, ISNS_FLAG_CLIENT);
 	isns_req_add_str(req, 32, TAILQ_FIRST(&conf->conf_targets)->t_name);
 	isns_req_add_str(req, 1, hostname);
 	isns_req_add_delim(req);
 	isns_req_add(req, 2, 0, NULL);
 	res = isns_req_send(s, req);
 	if (res < 0) {
 		log_warn("send(2) failed for %s", isns->i_addr);
 		goto quit;
 	}
 	res = isns_req_receive(s, req);
 	if (res < 0) {
 		log_warn("receive(2) failed for %s", isns->i_addr);
 		goto quit;
 	}
 	error = isns_req_get_status(req);
 	if (error != 0) {
 		log_warnx("iSNS check error %d for %s", error, isns->i_addr);
 		res = -1;
 	}
 quit:
 	isns_req_free(req);
 	return (res);
 }
 
 static int
 isns_do_deregister(struct isns *isns, int s, const char *hostname)
 {
 	struct conf *conf = isns->i_conf;
 	struct isns_req *req;
 	int res = 0;
 	uint32_t error;
 
 	req = isns_req_create(ISNS_FUNC_DEVDEREG, ISNS_FLAG_CLIENT);
 	isns_req_add_str(req, 32, TAILQ_FIRST(&conf->conf_targets)->t_name);
 	isns_req_add_delim(req);
 	isns_req_add_str(req, 1, hostname);
 	res = isns_req_send(s, req);
 	if (res < 0) {
 		log_warn("send(2) failed for %s", isns->i_addr);
 		goto quit;
 	}
 	res = isns_req_receive(s, req);
 	if (res < 0) {
 		log_warn("receive(2) failed for %s", isns->i_addr);
 		goto quit;
 	}
 	error = isns_req_get_status(req);
 	if (error != 0) {
 		log_warnx("iSNS deregister error %d for %s", error, isns->i_addr);
 		res = -1;
 	}
 quit:
 	isns_req_free(req);
 	return (res);
 }
 
 void
 isns_register(struct isns *isns, struct isns *oldisns)
 {
 	struct conf *conf = isns->i_conf;
 	int s;
 	char hostname[256];
 
 	if (TAILQ_EMPTY(&conf->conf_targets) ||
 	    TAILQ_EMPTY(&conf->conf_portal_groups))
 		return;
 	set_timeout(conf->conf_isns_timeout, false);
 	s = isns_do_connect(isns);
 	if (s < 0) {
 		set_timeout(0, false);
 		return;
 	}
 	gethostname(hostname, sizeof(hostname));
 
 	if (oldisns == NULL || TAILQ_EMPTY(&oldisns->i_conf->conf_targets))
 		oldisns = isns;
 	isns_do_deregister(oldisns, s, hostname);
 	isns_do_register(isns, s, hostname);
 	close(s);
 	set_timeout(0, false);
 }
 
 void
 isns_check(struct isns *isns)
 {
 	struct conf *conf = isns->i_conf;
 	int s, res;
 	char hostname[256];
 
 	if (TAILQ_EMPTY(&conf->conf_targets) ||
 	    TAILQ_EMPTY(&conf->conf_portal_groups))
 		return;
 	set_timeout(conf->conf_isns_timeout, false);
 	s = isns_do_connect(isns);
 	if (s < 0) {
 		set_timeout(0, false);
 		return;
 	}
 	gethostname(hostname, sizeof(hostname));
 
 	res = isns_do_check(isns, s, hostname);
 	if (res < 0) {
 		isns_do_deregister(isns, s, hostname);
 		isns_do_register(isns, s, hostname);
 	}
 	close(s);
 	set_timeout(0, false);
 }
 
 void
 isns_deregister(struct isns *isns)
 {
 	struct conf *conf = isns->i_conf;
 	int s;
 	char hostname[256];
 
 	if (TAILQ_EMPTY(&conf->conf_targets) ||
 	    TAILQ_EMPTY(&conf->conf_portal_groups))
 		return;
 	set_timeout(conf->conf_isns_timeout, false);
 	s = isns_do_connect(isns);
 	if (s < 0)
 		return;
 	gethostname(hostname, sizeof(hostname));
 
 	isns_do_deregister(isns, s, hostname);
 	close(s);
 	set_timeout(0, false);
 }
 
 int
 portal_group_set_filter(struct portal_group *pg, const char *str)
 {
 	int filter;
 
 	if (strcmp(str, "none") == 0) {
 		filter = PG_FILTER_NONE;
 	} else if (strcmp(str, "portal") == 0) {
 		filter = PG_FILTER_PORTAL;
 	} else if (strcmp(str, "portal-name") == 0) {
 		filter = PG_FILTER_PORTAL_NAME;
 	} else if (strcmp(str, "portal-name-auth") == 0) {
 		filter = PG_FILTER_PORTAL_NAME_AUTH;
 	} else {
 		log_warnx("invalid discovery-filter \"%s\" for portal-group "
 		    "\"%s\"; valid values are \"none\", \"portal\", "
 		    "\"portal-name\", and \"portal-name-auth\"",
 		    str, pg->pg_name);
 		return (1);
 	}
 
 	if (pg->pg_discovery_filter != PG_FILTER_UNKNOWN &&
 	    pg->pg_discovery_filter != filter) {
 		log_warnx("cannot set discovery-filter to \"%s\" for "
 		    "portal-group \"%s\"; already has a different "
 		    "value", str, pg->pg_name);
 		return (1);
 	}
 
 	pg->pg_discovery_filter = filter;
 
 	return (0);
 }
 
 int
 portal_group_set_offload(struct portal_group *pg, const char *offload)
 {
 
 	if (pg->pg_offload != NULL) {
 		log_warnx("cannot set offload to \"%s\" for "
 		    "portal-group \"%s\"; already defined",
 		    offload, pg->pg_name);
 		return (1);
 	}
 
 	pg->pg_offload = checked_strdup(offload);
 
 	return (0);
 }
 
 int
 portal_group_set_redirection(struct portal_group *pg, const char *addr)
 {
 
 	if (pg->pg_redirection != NULL) {
 		log_warnx("cannot set redirection to \"%s\" for "
 		    "portal-group \"%s\"; already defined",
 		    addr, pg->pg_name);
 		return (1);
 	}
 
 	pg->pg_redirection = checked_strdup(addr);
 
 	return (0);
 }
 
 static bool
 valid_hex(const char ch)
 {
 	switch (ch) {
 	case '0':
 	case '1':
 	case '2':
 	case '3':
 	case '4':
 	case '5':
 	case '6':
 	case '7':
 	case '8':
 	case '9':
 	case 'a':
 	case 'A':
 	case 'b':
 	case 'B':
 	case 'c':
 	case 'C':
 	case 'd':
 	case 'D':
 	case 'e':
 	case 'E':
 	case 'f':
 	case 'F':
 		return (true);
 	default:
 		return (false);
 	}
 }
 
 bool
 valid_iscsi_name(const char *name)
 {
 	int i;
 
 	if (strlen(name) >= MAX_NAME_LEN) {
 		log_warnx("overlong name for target \"%s\"; max length allowed "
 		    "by iSCSI specification is %d characters",
 		    name, MAX_NAME_LEN);
 		return (false);
 	}
 
 	/*
 	 * In the cases below, we don't return an error, just in case the admin
 	 * was right, and we're wrong.
 	 */
 	if (strncasecmp(name, "iqn.", strlen("iqn.")) == 0) {
 		for (i = strlen("iqn."); name[i] != '\0'; i++) {
 			/*
 			 * XXX: We should verify UTF-8 normalisation, as defined
 			 *      by 3.2.6.2: iSCSI Name Encoding.
 			 */
 			if (isalnum(name[i]))
 				continue;
 			if (name[i] == '-' || name[i] == '.' || name[i] == ':')
 				continue;
 			log_warnx("invalid character \"%c\" in target name "
 			    "\"%s\"; allowed characters are letters, digits, "
 			    "'-', '.', and ':'", name[i], name);
 			break;
 		}
 		/*
 		 * XXX: Check more stuff: valid date and a valid reversed domain.
 		 */
 	} else if (strncasecmp(name, "eui.", strlen("eui.")) == 0) {
 		if (strlen(name) != strlen("eui.") + 16)
 			log_warnx("invalid target name \"%s\"; the \"eui.\" "
 			    "should be followed by exactly 16 hexadecimal "
 			    "digits", name);
 		for (i = strlen("eui."); name[i] != '\0'; i++) {
 			if (!valid_hex(name[i])) {
 				log_warnx("invalid character \"%c\" in target "
 				    "name \"%s\"; allowed characters are 1-9 "
 				    "and A-F", name[i], name);
 				break;
 			}
 		}
 	} else if (strncasecmp(name, "naa.", strlen("naa.")) == 0) {
 		if (strlen(name) > strlen("naa.") + 32)
 			log_warnx("invalid target name \"%s\"; the \"naa.\" "
 			    "should be followed by at most 32 hexadecimal "
 			    "digits", name);
 		for (i = strlen("naa."); name[i] != '\0'; i++) {
 			if (!valid_hex(name[i])) {
 				log_warnx("invalid character \"%c\" in target "
 				    "name \"%s\"; allowed characters are 1-9 "
 				    "and A-F", name[i], name);
 				break;
 			}
 		}
 	} else {
 		log_warnx("invalid target name \"%s\"; should start with "
 		    "either \"iqn.\", \"eui.\", or \"naa.\"",
 		    name);
 	}
 	return (true);
 }
 
 struct pport *
 pport_new(struct conf *conf, const char *name, uint32_t ctl_port)
 {
 	struct pport *pp;
 
 	pp = calloc(1, sizeof(*pp));
 	if (pp == NULL)
 		log_err(1, "calloc");
 	pp->pp_conf = conf;
 	pp->pp_name = checked_strdup(name);
 	pp->pp_ctl_port = ctl_port;
 	TAILQ_INIT(&pp->pp_ports);
 	TAILQ_INSERT_TAIL(&conf->conf_pports, pp, pp_next);
 	return (pp);
 }
 
 struct pport *
 pport_find(const struct conf *conf, const char *name)
 {
 	struct pport *pp;
 
 	TAILQ_FOREACH(pp, &conf->conf_pports, pp_next) {
 		if (strcasecmp(pp->pp_name, name) == 0)
 			return (pp);
 	}
 	return (NULL);
 }
 
 struct pport *
 pport_copy(struct pport *pp, struct conf *conf)
 {
 	struct pport *ppnew;
 
 	ppnew = pport_new(conf, pp->pp_name, pp->pp_ctl_port);
 	return (ppnew);
 }
 
 void
 pport_delete(struct pport *pp)
 {
 	struct port *port, *tport;
 
 	TAILQ_FOREACH_SAFE(port, &pp->pp_ports, p_ts, tport)
 		port_delete(port);
 	TAILQ_REMOVE(&pp->pp_conf->conf_pports, pp, pp_next);
 	free(pp->pp_name);
 	free(pp);
 }
 
 struct port *
 port_new(struct conf *conf, struct target *target, struct portal_group *pg)
 {
 	struct port *port;
 	char *name;
 	int ret;
 
 	ret = asprintf(&name, "%s-%s", pg->pg_name, target->t_name);
 	if (ret <= 0)
 		log_err(1, "asprintf");
 	if (port_find(conf, name) != NULL) {
 		log_warnx("duplicate port \"%s\"", name);
 		free(name);
 		return (NULL);
 	}
 	port = calloc(1, sizeof(*port));
 	if (port == NULL)
 		log_err(1, "calloc");
 	port->p_conf = conf;
 	port->p_name = name;
 	TAILQ_INSERT_TAIL(&conf->conf_ports, port, p_next);
 	TAILQ_INSERT_TAIL(&target->t_ports, port, p_ts);
 	port->p_target = target;
 	TAILQ_INSERT_TAIL(&pg->pg_ports, port, p_pgs);
 	port->p_portal_group = pg;
 	port->p_foreign = pg->pg_foreign;
 	return (port);
 }
 
 struct port *
 port_new_pp(struct conf *conf, struct target *target, struct pport *pp)
 {
 	struct port *port;
 	char *name;
 	int ret;
 
 	ret = asprintf(&name, "%s-%s", pp->pp_name, target->t_name);
 	if (ret <= 0)
 		log_err(1, "asprintf");
 	if (port_find(conf, name) != NULL) {
 		log_warnx("duplicate port \"%s\"", name);
 		free(name);
 		return (NULL);
 	}
 	port = calloc(1, sizeof(*port));
 	if (port == NULL)
 		log_err(1, "calloc");
 	port->p_conf = conf;
 	port->p_name = name;
 	TAILQ_INSERT_TAIL(&conf->conf_ports, port, p_next);
 	TAILQ_INSERT_TAIL(&target->t_ports, port, p_ts);
 	port->p_target = target;
 	TAILQ_INSERT_TAIL(&pp->pp_ports, port, p_pps);
 	port->p_pport = pp;
 	return (port);
 }
 
 struct port *
 port_find(const struct conf *conf, const char *name)
 {
 	struct port *port;
 
 	TAILQ_FOREACH(port, &conf->conf_ports, p_next) {
 		if (strcasecmp(port->p_name, name) == 0)
 			return (port);
 	}
 
 	return (NULL);
 }
 
 struct port *
 port_find_in_pg(const struct portal_group *pg, const char *target)
 {
 	struct port *port;
 
 	TAILQ_FOREACH(port, &pg->pg_ports, p_pgs) {
 		if (strcasecmp(port->p_target->t_name, target) == 0)
 			return (port);
 	}
 
 	return (NULL);
 }
 
 void
 port_delete(struct port *port)
 {
 
 	if (port->p_portal_group)
 		TAILQ_REMOVE(&port->p_portal_group->pg_ports, port, p_pgs);
 	if (port->p_pport)
 		TAILQ_REMOVE(&port->p_pport->pp_ports, port, p_pps);
 	if (port->p_target)
 		TAILQ_REMOVE(&port->p_target->t_ports, port, p_ts);
 	TAILQ_REMOVE(&port->p_conf->conf_ports, port, p_next);
 	free(port->p_name);
 	free(port);
 }
 
 struct target *
 target_new(struct conf *conf, const char *name)
 {
 	struct target *targ;
 	int i, len;
 
 	targ = target_find(conf, name);
 	if (targ != NULL) {
 		log_warnx("duplicated target \"%s\"", name);
 		return (NULL);
 	}
 	if (valid_iscsi_name(name) == false) {
 		log_warnx("target name \"%s\" is invalid", name);
 		return (NULL);
 	}
 	targ = calloc(1, sizeof(*targ));
 	if (targ == NULL)
 		log_err(1, "calloc");
 	targ->t_name = checked_strdup(name);
 
 	/*
 	 * RFC 3722 requires us to normalize the name to lowercase.
 	 */
 	len = strlen(name);
 	for (i = 0; i < len; i++)
 		targ->t_name[i] = tolower(targ->t_name[i]);
 
 	targ->t_conf = conf;
 	TAILQ_INIT(&targ->t_ports);
 	TAILQ_INSERT_TAIL(&conf->conf_targets, targ, t_next);
 
 	return (targ);
 }
 
 void
 target_delete(struct target *targ)
 {
 	struct port *port, *tport;
 
 	TAILQ_FOREACH_SAFE(port, &targ->t_ports, p_ts, tport)
 		port_delete(port);
 	TAILQ_REMOVE(&targ->t_conf->conf_targets, targ, t_next);
 
 	free(targ->t_name);
 	free(targ->t_redirection);
 	free(targ);
 }
 
 struct target *
 target_find(struct conf *conf, const char *name)
 {
 	struct target *targ;
 
 	TAILQ_FOREACH(targ, &conf->conf_targets, t_next) {
 		if (strcasecmp(targ->t_name, name) == 0)
 			return (targ);
 	}
 
 	return (NULL);
 }
 
 int
 target_set_redirection(struct target *target, const char *addr)
 {
 
 	if (target->t_redirection != NULL) {
 		log_warnx("cannot set redirection to \"%s\" for "
 		    "target \"%s\"; already defined",
 		    addr, target->t_name);
 		return (1);
 	}
 
 	target->t_redirection = checked_strdup(addr);
 
 	return (0);
 }
 
 struct lun *
 lun_new(struct conf *conf, const char *name)
 {
 	struct lun *lun;
 
 	lun = lun_find(conf, name);
 	if (lun != NULL) {
 		log_warnx("duplicated lun \"%s\"", name);
 		return (NULL);
 	}
 
 	lun = calloc(1, sizeof(*lun));
 	if (lun == NULL)
 		log_err(1, "calloc");
 	lun->l_conf = conf;
 	lun->l_name = checked_strdup(name);
 	TAILQ_INIT(&lun->l_options);
 	TAILQ_INSERT_TAIL(&conf->conf_luns, lun, l_next);
 	lun->l_ctl_lun = -1;
 
 	return (lun);
 }
 
 void
 lun_delete(struct lun *lun)
 {
 	struct target *targ;
 	struct option *o, *tmp;
 	int i;
 
 	TAILQ_FOREACH(targ, &lun->l_conf->conf_targets, t_next) {
 		for (i = 0; i < MAX_LUNS; i++) {
 			if (targ->t_luns[i] == lun)
 				targ->t_luns[i] = NULL;
 		}
 	}
 	TAILQ_REMOVE(&lun->l_conf->conf_luns, lun, l_next);
 
 	TAILQ_FOREACH_SAFE(o, &lun->l_options, o_next, tmp)
 		option_delete(&lun->l_options, o);
 	free(lun->l_name);
 	free(lun->l_backend);
 	free(lun->l_device_id);
 	free(lun->l_path);
 	free(lun->l_scsiname);
 	free(lun->l_serial);
 	free(lun);
 }
 
 struct lun *
 lun_find(const struct conf *conf, const char *name)
 {
 	struct lun *lun;
 
 	TAILQ_FOREACH(lun, &conf->conf_luns, l_next) {
 		if (strcmp(lun->l_name, name) == 0)
 			return (lun);
 	}
 
 	return (NULL);
 }
 
 void
 lun_set_backend(struct lun *lun, const char *value)
 {
 	free(lun->l_backend);
 	lun->l_backend = checked_strdup(value);
 }
 
 void
 lun_set_blocksize(struct lun *lun, size_t value)
 {
 
 	lun->l_blocksize = value;
 }
 
 void
 lun_set_device_type(struct lun *lun, uint8_t value)
 {
 
 	lun->l_device_type = value;
 }
 
 void
 lun_set_device_id(struct lun *lun, const char *value)
 {
 	free(lun->l_device_id);
 	lun->l_device_id = checked_strdup(value);
 }
 
 void
 lun_set_path(struct lun *lun, const char *value)
 {
 	free(lun->l_path);
 	lun->l_path = checked_strdup(value);
 }
 
 void
 lun_set_scsiname(struct lun *lun, const char *value)
 {
 	free(lun->l_scsiname);
 	lun->l_scsiname = checked_strdup(value);
 }
 
 void
 lun_set_serial(struct lun *lun, const char *value)
 {
 	free(lun->l_serial);
 	lun->l_serial = checked_strdup(value);
 }
 
 void
 lun_set_size(struct lun *lun, size_t value)
 {
 
 	lun->l_size = value;
 }
 
 void
 lun_set_ctl_lun(struct lun *lun, uint32_t value)
 {
 
 	lun->l_ctl_lun = value;
 }
 
 struct option *
 option_new(struct options *options, const char *name, const char *value)
 {
 	struct option *o;
 
 	o = option_find(options, name);
 	if (o != NULL) {
 		log_warnx("duplicated option \"%s\"", name);
 		return (NULL);
 	}
 
 	o = calloc(1, sizeof(*o));
 	if (o == NULL)
 		log_err(1, "calloc");
 	o->o_name = checked_strdup(name);
 	o->o_value = checked_strdup(value);
 	TAILQ_INSERT_TAIL(options, o, o_next);
 
 	return (o);
 }
 
 void
 option_delete(struct options *options, struct option *o)
 {
 
 	TAILQ_REMOVE(options, o, o_next);
 	free(o->o_name);
 	free(o->o_value);
 	free(o);
 }
 
 struct option *
 option_find(const struct options *options, const char *name)
 {
 	struct option *o;
 
 	TAILQ_FOREACH(o, options, o_next) {
 		if (strcmp(o->o_name, name) == 0)
 			return (o);
 	}
 
 	return (NULL);
 }
 
 void
 option_set(struct option *o, const char *value)
 {
 
 	free(o->o_value);
 	o->o_value = checked_strdup(value);
 }
 
 static struct connection *
 connection_new(struct portal *portal, int fd, const char *host,
     const struct sockaddr *client_sa)
 {
 	struct connection *conn;
 
 	conn = calloc(1, sizeof(*conn));
 	if (conn == NULL)
 		log_err(1, "calloc");
 	conn->conn_portal = portal;
 	conn->conn_socket = fd;
 	conn->conn_initiator_addr = checked_strdup(host);
 	memcpy(&conn->conn_initiator_sa, client_sa, client_sa->sa_len);
 
 	/*
 	 * Default values, from RFC 3720, section 12.
 	 */
 	conn->conn_max_recv_data_segment_length = 8192;
 	conn->conn_max_burst_length = 262144;
 	conn->conn_first_burst_length = 65536;
 	conn->conn_immediate_data = true;
 
 	return (conn);
 }
 
 #if 0
 static void
 conf_print(struct conf *conf)
 {
 	struct auth_group *ag;
 	struct auth *auth;
 	struct auth_name *auth_name;
 	struct auth_portal *auth_portal;
 	struct portal_group *pg;
 	struct portal *portal;
 	struct target *targ;
 	struct lun *lun;
 	struct option *o;
 
 	TAILQ_FOREACH(ag, &conf->conf_auth_groups, ag_next) {
 		fprintf(stderr, "auth-group %s {\n", ag->ag_name);
 		TAILQ_FOREACH(auth, &ag->ag_auths, a_next)
 			fprintf(stderr, "\t chap-mutual %s %s %s %s\n",
 			    auth->a_user, auth->a_secret,
 			    auth->a_mutual_user, auth->a_mutual_secret);
 		TAILQ_FOREACH(auth_name, &ag->ag_names, an_next)
 			fprintf(stderr, "\t initiator-name %s\n",
 			    auth_name->an_initator_name);
 		TAILQ_FOREACH(auth_portal, &ag->ag_portals, an_next)
 			fprintf(stderr, "\t initiator-portal %s\n",
 			    auth_portal->an_initator_portal);
 		fprintf(stderr, "}\n");
 	}
 	TAILQ_FOREACH(pg, &conf->conf_portal_groups, pg_next) {
 		fprintf(stderr, "portal-group %s {\n", pg->pg_name);
 		TAILQ_FOREACH(portal, &pg->pg_portals, p_next)
 			fprintf(stderr, "\t listen %s\n", portal->p_listen);
 		fprintf(stderr, "}\n");
 	}
 	TAILQ_FOREACH(lun, &conf->conf_luns, l_next) {
 		fprintf(stderr, "\tlun %s {\n", lun->l_name);
 		fprintf(stderr, "\t\tpath %s\n", lun->l_path);
 		TAILQ_FOREACH(o, &lun->l_options, o_next)
 			fprintf(stderr, "\t\toption %s %s\n",
 			    lo->o_name, lo->o_value);
 		fprintf(stderr, "\t}\n");
 	}
 	TAILQ_FOREACH(targ, &conf->conf_targets, t_next) {
 		fprintf(stderr, "target %s {\n", targ->t_name);
 		if (targ->t_alias != NULL)
 			fprintf(stderr, "\t alias %s\n", targ->t_alias);
 		fprintf(stderr, "}\n");
 	}
 }
 #endif
 
 static int
 conf_verify_lun(struct lun *lun)
 {
 	const struct lun *lun2;
 
 	if (lun->l_backend == NULL)
 		lun_set_backend(lun, "block");
 	if (strcmp(lun->l_backend, "block") == 0) {
 		if (lun->l_path == NULL) {
 			log_warnx("missing path for lun \"%s\"",
 			    lun->l_name);
 			return (1);
 		}
 	} else if (strcmp(lun->l_backend, "ramdisk") == 0) {
 		if (lun->l_size == 0) {
 			log_warnx("missing size for ramdisk-backed lun \"%s\"",
 			    lun->l_name);
 			return (1);
 		}
 		if (lun->l_path != NULL) {
 			log_warnx("path must not be specified "
 			    "for ramdisk-backed lun \"%s\"",
 			    lun->l_name);
 			return (1);
 		}
 	}
 	if (lun->l_blocksize == 0) {
 		if (lun->l_device_type == 5)
 			lun_set_blocksize(lun, DEFAULT_CD_BLOCKSIZE);
 		else
 			lun_set_blocksize(lun, DEFAULT_BLOCKSIZE);
 	} else if (lun->l_blocksize < 0) {
 		log_warnx("invalid blocksize for lun \"%s\"; "
 		    "must be larger than 0", lun->l_name);
 		return (1);
 	}
 	if (lun->l_size != 0 && lun->l_size % lun->l_blocksize != 0) {
 		log_warnx("invalid size for lun \"%s\"; "
 		    "must be multiple of blocksize", lun->l_name);
 		return (1);
 	}
 	TAILQ_FOREACH(lun2, &lun->l_conf->conf_luns, l_next) {
 		if (lun == lun2)
 			continue;
 		if (lun->l_path != NULL && lun2->l_path != NULL &&
 		    strcmp(lun->l_path, lun2->l_path) == 0) {
 			log_debugx("WARNING: path \"%s\" duplicated "
 			    "between lun \"%s\", and "
 			    "lun \"%s\"", lun->l_path,
 			    lun->l_name, lun2->l_name);
 		}
 	}
 
 	return (0);
 }
 
 int
 conf_verify(struct conf *conf)
 {
 	struct auth_group *ag;
 	struct portal_group *pg;
 	struct port *port;
 	struct target *targ;
 	struct lun *lun;
 	bool found;
 	int error, i;
 
 	if (conf->conf_pidfile_path == NULL)
 		conf->conf_pidfile_path = checked_strdup(DEFAULT_PIDFILE);
 
 	TAILQ_FOREACH(lun, &conf->conf_luns, l_next) {
 		error = conf_verify_lun(lun);
 		if (error != 0)
 			return (error);
 	}
 	TAILQ_FOREACH(targ, &conf->conf_targets, t_next) {
 		if (targ->t_auth_group == NULL) {
 			targ->t_auth_group = auth_group_find(conf,
 			    "default");
 			assert(targ->t_auth_group != NULL);
 		}
 		if (TAILQ_EMPTY(&targ->t_ports)) {
 			pg = portal_group_find(conf, "default");
 			assert(pg != NULL);
 			port_new(conf, targ, pg);
 		}
 		found = false;
 		for (i = 0; i < MAX_LUNS; i++) {
 			if (targ->t_luns[i] != NULL)
 				found = true;
 		}
 		if (!found && targ->t_redirection == NULL) {
 			log_warnx("no LUNs defined for target \"%s\"",
 			    targ->t_name);
 		}
 		if (found && targ->t_redirection != NULL) {
 			log_debugx("target \"%s\" contains luns, "
 			    " but configured for redirection",
 			    targ->t_name);
 		}
 	}
 	TAILQ_FOREACH(pg, &conf->conf_portal_groups, pg_next) {
 		assert(pg->pg_name != NULL);
 		if (pg->pg_discovery_auth_group == NULL) {
 			pg->pg_discovery_auth_group =
 			    auth_group_find(conf, "default");
 			assert(pg->pg_discovery_auth_group != NULL);
 		}
 
 		if (pg->pg_discovery_filter == PG_FILTER_UNKNOWN)
 			pg->pg_discovery_filter = PG_FILTER_NONE;
 
 		if (pg->pg_redirection != NULL) {
 			if (!TAILQ_EMPTY(&pg->pg_ports)) {
 				log_debugx("portal-group \"%s\" assigned "
 				    "to target, but configured "
 				    "for redirection",
 				    pg->pg_name);
 			}
 			pg->pg_unassigned = false;
 		} else if (!TAILQ_EMPTY(&pg->pg_ports)) {
 			pg->pg_unassigned = false;
 		} else {
 			if (strcmp(pg->pg_name, "default") != 0)
 				log_warnx("portal-group \"%s\" not assigned "
 				    "to any target", pg->pg_name);
 			pg->pg_unassigned = true;
 		}
 	}
 	TAILQ_FOREACH(ag, &conf->conf_auth_groups, ag_next) {
 		if (ag->ag_name == NULL)
 			assert(ag->ag_target != NULL);
 		else
 			assert(ag->ag_target == NULL);
 
 		found = false;
 		TAILQ_FOREACH(targ, &conf->conf_targets, t_next) {
 			if (targ->t_auth_group == ag) {
 				found = true;
 				break;
 			}
 		}
 		TAILQ_FOREACH(port, &conf->conf_ports, p_next) {
 			if (port->p_auth_group == ag) {
 				found = true;
 				break;
 			}
 		}
 		TAILQ_FOREACH(pg, &conf->conf_portal_groups, pg_next) {
 			if (pg->pg_discovery_auth_group == ag) {
 				found = true;
 				break;
 			}
 		}
 		if (!found && ag->ag_name != NULL &&
 		    strcmp(ag->ag_name, "default") != 0 &&
 		    strcmp(ag->ag_name, "no-authentication") != 0 &&
 		    strcmp(ag->ag_name, "no-access") != 0) {
 			log_warnx("auth-group \"%s\" not assigned "
 			    "to any target", ag->ag_name);
 		}
 	}
 
 	return (0);
 }
 
 static int
 conf_apply(struct conf *oldconf, struct conf *newconf)
 {
 	struct lun *oldlun, *newlun, *tmplun;
 	struct portal_group *oldpg, *newpg;
 	struct portal *oldp, *newp;
 	struct port *oldport, *newport, *tmpport;
 	struct isns *oldns, *newns;
 	pid_t otherpid;
 	int changed, cumulated_error = 0, error, sockbuf;
 	int one = 1;
 
 	if (oldconf->conf_debug != newconf->conf_debug) {
 		log_debugx("changing debug level to %d", newconf->conf_debug);
 		log_init(newconf->conf_debug);
 	}
 
 	if (oldconf->conf_pidfh != NULL) {
 		assert(oldconf->conf_pidfile_path != NULL);
 		if (newconf->conf_pidfile_path != NULL &&
 		    strcmp(oldconf->conf_pidfile_path,
 		    newconf->conf_pidfile_path) == 0) {
 			newconf->conf_pidfh = oldconf->conf_pidfh;
 			oldconf->conf_pidfh = NULL;
 		} else {
 			log_debugx("removing pidfile %s",
 			    oldconf->conf_pidfile_path);
 			pidfile_remove(oldconf->conf_pidfh);
 			oldconf->conf_pidfh = NULL;
 		}
 	}
 
 	if (newconf->conf_pidfh == NULL && newconf->conf_pidfile_path != NULL) {
 		log_debugx("opening pidfile %s", newconf->conf_pidfile_path);
 		newconf->conf_pidfh =
 		    pidfile_open(newconf->conf_pidfile_path, 0600, &otherpid);
 		if (newconf->conf_pidfh == NULL) {
 			if (errno == EEXIST)
 				log_errx(1, "daemon already running, pid: %jd.",
 				    (intmax_t)otherpid);
 			log_err(1, "cannot open or create pidfile \"%s\"",
 			    newconf->conf_pidfile_path);
 		}
 	}
 
 	/*
 	 * Go through the new portal groups, assigning tags or preserving old.
 	 */
 	TAILQ_FOREACH(newpg, &newconf->conf_portal_groups, pg_next) {
 		if (newpg->pg_tag != 0)
 			continue;
 		oldpg = portal_group_find(oldconf, newpg->pg_name);
 		if (oldpg != NULL)
 			newpg->pg_tag = oldpg->pg_tag;
 		else
 			newpg->pg_tag = ++last_portal_group_tag;
 	}
 
 	/* Deregister on removed iSNS servers. */
 	TAILQ_FOREACH(oldns, &oldconf->conf_isns, i_next) {
 		TAILQ_FOREACH(newns, &newconf->conf_isns, i_next) {
 			if (strcmp(oldns->i_addr, newns->i_addr) == 0)
 				break;
 		}
 		if (newns == NULL)
 			isns_deregister(oldns);
 	}
 
 	/*
 	 * XXX: If target or lun removal fails, we should somehow "move"
 	 *      the old lun or target into newconf, so that subsequent
 	 *      conf_apply() would try to remove them again.  That would
 	 *      be somewhat hairy, though, and lun deletion failures don't
 	 *      really happen, so leave it as it is for now.
 	 */
 	/*
 	 * First, remove any ports present in the old configuration
 	 * and missing in the new one.
 	 */
 	TAILQ_FOREACH_SAFE(oldport, &oldconf->conf_ports, p_next, tmpport) {
 		if (oldport->p_foreign)
 			continue;
 		newport = port_find(newconf, oldport->p_name);
 		if (newport != NULL && !newport->p_foreign)
 			continue;
 		log_debugx("removing port \"%s\"", oldport->p_name);
 		error = kernel_port_remove(oldport);
 		if (error != 0) {
 			log_warnx("failed to remove port %s",
 			    oldport->p_name);
 			/*
 			 * XXX: Uncomment after fixing the root cause.
 			 *
 			 * cumulated_error++;
 			 */
 		}
 	}
 
 	/*
 	 * Second, remove any LUNs present in the old configuration
 	 * and missing in the new one.
 	 */
 	TAILQ_FOREACH_SAFE(oldlun, &oldconf->conf_luns, l_next, tmplun) {
 		newlun = lun_find(newconf, oldlun->l_name);
 		if (newlun == NULL) {
 			log_debugx("lun \"%s\", CTL lun %d "
 			    "not found in new configuration; "
 			    "removing", oldlun->l_name, oldlun->l_ctl_lun);
 			error = kernel_lun_remove(oldlun);
 			if (error != 0) {
 				log_warnx("failed to remove lun \"%s\", "
 				    "CTL lun %d",
 				    oldlun->l_name, oldlun->l_ctl_lun);
 				cumulated_error++;
 			}
 			continue;
 		}
 
 		/*
 		 * Also remove the LUNs changed by more than size.
 		 */
 		changed = 0;
 		assert(oldlun->l_backend != NULL);
 		assert(newlun->l_backend != NULL);
 		if (strcmp(newlun->l_backend, oldlun->l_backend) != 0) {
 			log_debugx("backend for lun \"%s\", "
 			    "CTL lun %d changed; removing",
 			    oldlun->l_name, oldlun->l_ctl_lun);
 			changed = 1;
 		}
 		if (oldlun->l_blocksize != newlun->l_blocksize) {
 			log_debugx("blocksize for lun \"%s\", "
 			    "CTL lun %d changed; removing",
 			    oldlun->l_name, oldlun->l_ctl_lun);
 			changed = 1;
 		}
 		if (newlun->l_device_id != NULL &&
 		    (oldlun->l_device_id == NULL ||
 		     strcmp(oldlun->l_device_id, newlun->l_device_id) !=
 		     0)) {
 			log_debugx("device-id for lun \"%s\", "
 			    "CTL lun %d changed; removing",
 			    oldlun->l_name, oldlun->l_ctl_lun);
 			changed = 1;
 		}
 		if (newlun->l_path != NULL &&
 		    (oldlun->l_path == NULL ||
 		     strcmp(oldlun->l_path, newlun->l_path) != 0)) {
 			log_debugx("path for lun \"%s\", "
 			    "CTL lun %d, changed; removing",
 			    oldlun->l_name, oldlun->l_ctl_lun);
 			changed = 1;
 		}
 		if (newlun->l_serial != NULL &&
 		    (oldlun->l_serial == NULL ||
 		     strcmp(oldlun->l_serial, newlun->l_serial) != 0)) {
 			log_debugx("serial for lun \"%s\", "
 			    "CTL lun %d changed; removing",
 			    oldlun->l_name, oldlun->l_ctl_lun);
 			changed = 1;
 		}
 		if (changed) {
 			error = kernel_lun_remove(oldlun);
 			if (error != 0) {
 				log_warnx("failed to remove lun \"%s\", "
 				    "CTL lun %d",
 				    oldlun->l_name, oldlun->l_ctl_lun);
 				cumulated_error++;
 			}
 			lun_delete(oldlun);
 			continue;
 		}
 
 		lun_set_ctl_lun(newlun, oldlun->l_ctl_lun);
 	}
 
 	TAILQ_FOREACH_SAFE(newlun, &newconf->conf_luns, l_next, tmplun) {
 		oldlun = lun_find(oldconf, newlun->l_name);
 		if (oldlun != NULL) {
 			log_debugx("modifying lun \"%s\", CTL lun %d",
 			    newlun->l_name, newlun->l_ctl_lun);
 			error = kernel_lun_modify(newlun);
 			if (error != 0) {
 				log_warnx("failed to "
 				    "modify lun \"%s\", CTL lun %d",
 				    newlun->l_name, newlun->l_ctl_lun);
 				cumulated_error++;
 			}
 			continue;
 		}
 		log_debugx("adding lun \"%s\"", newlun->l_name);
 		error = kernel_lun_add(newlun);
 		if (error != 0) {
 			log_warnx("failed to add lun \"%s\"", newlun->l_name);
 			lun_delete(newlun);
 			cumulated_error++;
 		}
 	}
 
 	/*
 	 * Now add new ports or modify existing ones.
 	 */
 	TAILQ_FOREACH(newport, &newconf->conf_ports, p_next) {
 		if (newport->p_foreign)
 			continue;
 		oldport = port_find(oldconf, newport->p_name);
 
 		if (oldport == NULL || oldport->p_foreign) {
 			log_debugx("adding port \"%s\"", newport->p_name);
 			error = kernel_port_add(newport);
 		} else {
 			log_debugx("updating port \"%s\"", newport->p_name);
 			newport->p_ctl_port = oldport->p_ctl_port;
 			error = kernel_port_update(newport, oldport);
 		}
 		if (error != 0) {
 			log_warnx("failed to %s port %s",
 			    (oldport == NULL) ? "add" : "update",
 			    newport->p_name);
 			/*
 			 * XXX: Uncomment after fixing the root cause.
 			 *
 			 * cumulated_error++;
 			 */
 		}
 	}
 
 	/*
 	 * Go through the new portals, opening the sockets as necessary.
 	 */
 	TAILQ_FOREACH(newpg, &newconf->conf_portal_groups, pg_next) {
 		if (newpg->pg_foreign)
 			continue;
 		if (newpg->pg_unassigned) {
 			log_debugx("not listening on portal-group \"%s\", "
 			    "not assigned to any target",
 			    newpg->pg_name);
 			continue;
 		}
 		TAILQ_FOREACH(newp, &newpg->pg_portals, p_next) {
 			/*
 			 * Try to find already open portal and reuse
 			 * the listening socket.  We don't care about
 			 * what portal or portal group that was, what
 			 * matters is the listening address.
 			 */
 			TAILQ_FOREACH(oldpg, &oldconf->conf_portal_groups,
 			    pg_next) {
 				TAILQ_FOREACH(oldp, &oldpg->pg_portals,
 				    p_next) {
 					if (strcmp(newp->p_listen,
 					    oldp->p_listen) == 0 &&
 					    oldp->p_socket > 0) {
 						newp->p_socket =
 						    oldp->p_socket;
 						oldp->p_socket = 0;
 						break;
 					}
 				}
 			}
 			if (newp->p_socket > 0) {
 				/*
 				 * We're done with this portal.
 				 */
 				continue;
 			}
 
 #ifdef ICL_KERNEL_PROXY
 			if (proxy_mode) {
 				newpg->pg_conf->conf_portal_id++;
 				newp->p_id = newpg->pg_conf->conf_portal_id;
 				log_debugx("listening on %s, portal-group "
 				    "\"%s\", portal id %d, using ICL proxy",
 				    newp->p_listen, newpg->pg_name, newp->p_id);
 				kernel_listen(newp->p_ai, newp->p_iser,
 				    newp->p_id);
 				continue;
 			}
 #endif
 			assert(proxy_mode == false);
 			assert(newp->p_iser == false);
 
 			log_debugx("listening on %s, portal-group \"%s\"",
 			    newp->p_listen, newpg->pg_name);
 			newp->p_socket = socket(newp->p_ai->ai_family,
 			    newp->p_ai->ai_socktype,
 			    newp->p_ai->ai_protocol);
 			if (newp->p_socket < 0) {
 				log_warn("socket(2) failed for %s",
 				    newp->p_listen);
 				cumulated_error++;
 				continue;
 			}
 			sockbuf = SOCKBUF_SIZE;
 			if (setsockopt(newp->p_socket, SOL_SOCKET, SO_RCVBUF,
 			    &sockbuf, sizeof(sockbuf)) == -1)
 				log_warn("setsockopt(SO_RCVBUF) failed "
 				    "for %s", newp->p_listen);
 			sockbuf = SOCKBUF_SIZE;
 			if (setsockopt(newp->p_socket, SOL_SOCKET, SO_SNDBUF,
 			    &sockbuf, sizeof(sockbuf)) == -1)
 				log_warn("setsockopt(SO_SNDBUF) failed "
 				    "for %s", newp->p_listen);
 			error = setsockopt(newp->p_socket, SOL_SOCKET,
 			    SO_REUSEADDR, &one, sizeof(one));
 			if (error != 0) {
 				log_warn("setsockopt(SO_REUSEADDR) failed "
 				    "for %s", newp->p_listen);
 				close(newp->p_socket);
 				newp->p_socket = 0;
 				cumulated_error++;
 				continue;
 			}
 			error = bind(newp->p_socket, newp->p_ai->ai_addr,
 			    newp->p_ai->ai_addrlen);
 			if (error != 0) {
 				log_warn("bind(2) failed for %s",
 				    newp->p_listen);
 				close(newp->p_socket);
 				newp->p_socket = 0;
 				cumulated_error++;
 				continue;
 			}
 			error = listen(newp->p_socket, -1);
 			if (error != 0) {
 				log_warn("listen(2) failed for %s",
 				    newp->p_listen);
 				close(newp->p_socket);
 				newp->p_socket = 0;
 				cumulated_error++;
 				continue;
 			}
 		}
 	}
 
 	/*
 	 * Go through the no longer used sockets, closing them.
 	 */
 	TAILQ_FOREACH(oldpg, &oldconf->conf_portal_groups, pg_next) {
 		TAILQ_FOREACH(oldp, &oldpg->pg_portals, p_next) {
 			if (oldp->p_socket <= 0)
 				continue;
 			log_debugx("closing socket for %s, portal-group \"%s\"",
 			    oldp->p_listen, oldpg->pg_name);
 			close(oldp->p_socket);
 			oldp->p_socket = 0;
 		}
 	}
 
 	/* (Re-)Register on remaining/new iSNS servers. */
 	TAILQ_FOREACH(newns, &newconf->conf_isns, i_next) {
 		TAILQ_FOREACH(oldns, &oldconf->conf_isns, i_next) {
 			if (strcmp(oldns->i_addr, newns->i_addr) == 0)
 				break;
 		}
 		isns_register(newns, oldns);
 	}
 
 	/* Schedule iSNS update */
 	if (!TAILQ_EMPTY(&newconf->conf_isns))
 		set_timeout((newconf->conf_isns_period + 2) / 3, false);
 
 	return (cumulated_error);
 }
 
 bool
 timed_out(void)
 {
 
 	return (sigalrm_received);
 }
 
 static void
 sigalrm_handler_fatal(int dummy __unused)
 {
 	/*
 	 * It would be easiest to just log an error and exit.  We can't
 	 * do this, though, because log_errx() is not signal safe, since
 	 * it calls syslog(3).  Instead, set a flag checked by pdu_send()
 	 * and pdu_receive(), to call log_errx() there.  Should they fail
 	 * to notice, we'll exit here one second later.
 	 */
 	if (sigalrm_received) {
 		/*
 		 * Oh well.  Just give up and quit.
 		 */
 		_exit(2);
 	}
 
 	sigalrm_received = true;
 }
 
 static void
 sigalrm_handler(int dummy __unused)
 {
 
 	sigalrm_received = true;
 }
 
 void
 set_timeout(int timeout, int fatal)
 {
 	struct sigaction sa;
 	struct itimerval itv;
 	int error;
 
 	if (timeout <= 0) {
 		log_debugx("session timeout disabled");
 		bzero(&itv, sizeof(itv));
 		error = setitimer(ITIMER_REAL, &itv, NULL);
 		if (error != 0)
 			log_err(1, "setitimer");
 		sigalrm_received = false;
 		return;
 	}
 
 	sigalrm_received = false;
 	bzero(&sa, sizeof(sa));
 	if (fatal)
 		sa.sa_handler = sigalrm_handler_fatal;
 	else
 		sa.sa_handler = sigalrm_handler;
 	sigfillset(&sa.sa_mask);
 	error = sigaction(SIGALRM, &sa, NULL);
 	if (error != 0)
 		log_err(1, "sigaction");
 
 	/*
 	 * First SIGALRM will arive after conf_timeout seconds.
 	 * If we do nothing, another one will arrive a second later.
 	 */
 	log_debugx("setting session timeout to %d seconds", timeout);
 	bzero(&itv, sizeof(itv));
 	itv.it_interval.tv_sec = 1;
 	itv.it_value.tv_sec = timeout;
 	error = setitimer(ITIMER_REAL, &itv, NULL);
 	if (error != 0)
 		log_err(1, "setitimer");
 }
 
 static int
 wait_for_children(bool block)
 {
 	pid_t pid;
 	int status;
 	int num = 0;
 
 	for (;;) {
 		/*
 		 * If "block" is true, wait for at least one process.
 		 */
 		if (block && num == 0)
 			pid = wait4(-1, &status, 0, NULL);
 		else
 			pid = wait4(-1, &status, WNOHANG, NULL);
 		if (pid <= 0)
 			break;
 		if (WIFSIGNALED(status)) {
 			log_warnx("child process %d terminated with signal %d",
 			    pid, WTERMSIG(status));
 		} else if (WEXITSTATUS(status) != 0) {
 			log_warnx("child process %d terminated with exit status %d",
 			    pid, WEXITSTATUS(status));
 		} else {
 			log_debugx("child process %d terminated gracefully", pid);
 		}
 		num++;
 	}
 
 	return (num);
 }
 
 static void
 handle_connection(struct portal *portal, int fd,
     const struct sockaddr *client_sa, bool dont_fork)
 {
 	struct connection *conn;
 	int error;
 	pid_t pid;
 	char host[NI_MAXHOST + 1];
 	struct conf *conf;
 
 	conf = portal->p_portal_group->pg_conf;
 
 	if (dont_fork) {
 		log_debugx("incoming connection; not forking due to -d flag");
 	} else {
 		nchildren -= wait_for_children(false);
 		assert(nchildren >= 0);
 
 		while (conf->conf_maxproc > 0 && nchildren >= conf->conf_maxproc) {
 			log_debugx("maxproc limit of %d child processes hit; "
 			    "waiting for child process to exit", conf->conf_maxproc);
 			nchildren -= wait_for_children(true);
 			assert(nchildren >= 0);
 		}
 		log_debugx("incoming connection; forking child process #%d",
 		    nchildren);
 		nchildren++;
 		pid = fork();
 		if (pid < 0)
 			log_err(1, "fork");
 		if (pid > 0) {
 			close(fd);
 			return;
 		}
 	}
 	pidfile_close(conf->conf_pidfh);
 
 	error = getnameinfo(client_sa, client_sa->sa_len,
 	    host, sizeof(host), NULL, 0, NI_NUMERICHOST);
 	if (error != 0)
 		log_errx(1, "getnameinfo: %s", gai_strerror(error));
 
 	log_debugx("accepted connection from %s; portal group \"%s\"",
 	    host, portal->p_portal_group->pg_name);
 	log_set_peer_addr(host);
 	setproctitle("%s", host);
 
 	conn = connection_new(portal, fd, host, client_sa);
 	set_timeout(conf->conf_timeout, true);
 	kernel_capsicate();
 	login(conn);
 	if (conn->conn_session_type == CONN_SESSION_TYPE_NORMAL) {
 		kernel_handoff(conn);
 		log_debugx("connection handed off to the kernel");
 	} else {
 		assert(conn->conn_session_type == CONN_SESSION_TYPE_DISCOVERY);
 		discovery(conn);
 	}
 	log_debugx("nothing more to do; exiting");
 	exit(0);
 }
 
 static int
 fd_add(int fd, fd_set *fdset, int nfds)
 {
 
 	/*
 	 * Skip sockets which we failed to bind.
 	 */
 	if (fd <= 0)
 		return (nfds);
 
 	FD_SET(fd, fdset);
 	if (fd > nfds)
 		nfds = fd;
 	return (nfds);
 }
 
 static void
 main_loop(struct conf *conf, bool dont_fork)
 {
 	struct portal_group *pg;
 	struct portal *portal;
 	struct sockaddr_storage client_sa;
 	socklen_t client_salen;
 #ifdef ICL_KERNEL_PROXY
 	int connection_id;
 	int portal_id;
 #endif
 	fd_set fdset;
 	int error, nfds, client_fd;
 
 	pidfile_write(conf->conf_pidfh);
 
 	for (;;) {
 		if (sighup_received || sigterm_received || timed_out())
 			return;
 
 #ifdef ICL_KERNEL_PROXY
 		if (proxy_mode) {
 			client_salen = sizeof(client_sa);
 			kernel_accept(&connection_id, &portal_id,
 			    (struct sockaddr *)&client_sa, &client_salen);
 			assert(client_salen >= client_sa.ss_len);
 
 			log_debugx("incoming connection, id %d, portal id %d",
 			    connection_id, portal_id);
 			TAILQ_FOREACH(pg, &conf->conf_portal_groups, pg_next) {
 				TAILQ_FOREACH(portal, &pg->pg_portals, p_next) {
 					if (portal->p_id == portal_id) {
 						goto found;
 					}
 				}
 			}
 
 			log_errx(1, "kernel returned invalid portal_id %d",
 			    portal_id);
 
 found:
 			handle_connection(portal, connection_id,
 			    (struct sockaddr *)&client_sa, dont_fork);
 		} else {
 #endif
 			assert(proxy_mode == false);
 
 			FD_ZERO(&fdset);
 			nfds = 0;
 			TAILQ_FOREACH(pg, &conf->conf_portal_groups, pg_next) {
 				TAILQ_FOREACH(portal, &pg->pg_portals, p_next)
 					nfds = fd_add(portal->p_socket, &fdset, nfds);
 			}
 			error = select(nfds + 1, &fdset, NULL, NULL, NULL);
 			if (error <= 0) {
 				if (errno == EINTR)
 					return;
 				log_err(1, "select");
 			}
 			TAILQ_FOREACH(pg, &conf->conf_portal_groups, pg_next) {
 				TAILQ_FOREACH(portal, &pg->pg_portals, p_next) {
 					if (!FD_ISSET(portal->p_socket, &fdset))
 						continue;
 					client_salen = sizeof(client_sa);
 					client_fd = accept(portal->p_socket,
 					    (struct sockaddr *)&client_sa,
 					    &client_salen);
 					if (client_fd < 0) {
 						if (errno == ECONNABORTED)
 							continue;
 						log_err(1, "accept");
 					}
 					assert(client_salen >= client_sa.ss_len);
 
 					handle_connection(portal, client_fd,
 					    (struct sockaddr *)&client_sa,
 					    dont_fork);
 					break;
 				}
 			}
 #ifdef ICL_KERNEL_PROXY
 		}
 #endif
 	}
 }
 
 static void
 sighup_handler(int dummy __unused)
 {
 
 	sighup_received = true;
 }
 
 static void
 sigterm_handler(int dummy __unused)
 {
 
 	sigterm_received = true;
 }
 
 static void
 sigchld_handler(int dummy __unused)
 {
 
 	/*
 	 * The only purpose of this handler is to make SIGCHLD
 	 * interrupt the ISCSIDWAIT ioctl(2), so we can call
 	 * wait_for_children().
 	 */
 }
 
 static void
 register_signals(void)
 {
 	struct sigaction sa;
 	int error;
 
 	bzero(&sa, sizeof(sa));
 	sa.sa_handler = sighup_handler;
 	sigfillset(&sa.sa_mask);
 	error = sigaction(SIGHUP, &sa, NULL);
 	if (error != 0)
 		log_err(1, "sigaction");
 
 	sa.sa_handler = sigterm_handler;
 	error = sigaction(SIGTERM, &sa, NULL);
 	if (error != 0)
 		log_err(1, "sigaction");
 
 	sa.sa_handler = sigterm_handler;
 	error = sigaction(SIGINT, &sa, NULL);
 	if (error != 0)
 		log_err(1, "sigaction");
 
 	sa.sa_handler = sigchld_handler;
 	error = sigaction(SIGCHLD, &sa, NULL);
 	if (error != 0)
 		log_err(1, "sigaction");
 }
 
 static void
 check_perms(const char *path)
 {
 	struct stat sb;
 	int error;
 
 	error = stat(path, &sb);
 	if (error != 0) {
 		log_warn("stat");
 		return;
 	}
 	if (sb.st_mode & S_IWOTH) {
 		log_warnx("%s is world-writable", path);
 	} else if (sb.st_mode & S_IROTH) {
 		log_warnx("%s is world-readable", path);
 	} else if (sb.st_mode & S_IXOTH) {
 		/*
 		 * Ok, this one doesn't matter, but still do it,
 		 * just for consistency.
 		 */
 		log_warnx("%s is world-executable", path);
 	}
 
 	/*
 	 * XXX: Should we also check for owner != 0?
 	 */
 }
 
 static struct conf *
 conf_new_from_file(const char *path, struct conf *oldconf, bool ucl)
 {
 	struct conf *conf;
 	struct auth_group *ag;
 	struct portal_group *pg;
 	struct pport *pp;
 	int error;
 
 	log_debugx("obtaining configuration from %s", path);
 
 	conf = conf_new();
 
 	TAILQ_FOREACH(pp, &oldconf->conf_pports, pp_next)
 		pport_copy(pp, conf);
 
 	ag = auth_group_new(conf, "default");
 	assert(ag != NULL);
 
 	ag = auth_group_new(conf, "no-authentication");
 	assert(ag != NULL);
 	ag->ag_type = AG_TYPE_NO_AUTHENTICATION;
 
 	ag = auth_group_new(conf, "no-access");
 	assert(ag != NULL);
 	ag->ag_type = AG_TYPE_DENY;
 
 	pg = portal_group_new(conf, "default");
 	assert(pg != NULL);
 
 	if (ucl)
 		error = uclparse_conf(conf, path);
 	else
 		error = parse_conf(conf, path);
 
 	if (error != 0) {
 		conf_delete(conf);
 		return (NULL);
 	}
 
 	check_perms(path);
 
 	if (conf->conf_default_ag_defined == false) {
 		log_debugx("auth-group \"default\" not defined; "
 		    "going with defaults");
 		ag = auth_group_find(conf, "default");
 		assert(ag != NULL);
 		ag->ag_type = AG_TYPE_DENY;
 	}
 
 	if (conf->conf_default_pg_defined == false) {
 		log_debugx("portal-group \"default\" not defined; "
 		    "going with defaults");
 		pg = portal_group_find(conf, "default");
 		assert(pg != NULL);
 		portal_group_add_listen(pg, "0.0.0.0:3260", false);
 		portal_group_add_listen(pg, "[::]:3260", false);
 	}
 
 	conf->conf_kernel_port_on = true;
 
 	error = conf_verify(conf);
 	if (error != 0) {
 		conf_delete(conf);
 		return (NULL);
 	}
 
 	return (conf);
 }
 
 int
 main(int argc, char **argv)
 {
 	struct conf *oldconf, *newconf, *tmpconf;
 	struct isns *newns;
 	const char *config_path = DEFAULT_CONFIG_PATH;
 	int debug = 0, ch, error;
 	bool dont_daemonize = false;
 	bool use_ucl = false;
 
 	while ((ch = getopt(argc, argv, "duf:R")) != -1) {
 		switch (ch) {
 		case 'd':
 			dont_daemonize = true;
 			debug++;
 			break;
 		case 'u':
 			use_ucl = true;
 			break;
 		case 'f':
 			config_path = optarg;
 			break;
 		case 'R':
 #ifndef ICL_KERNEL_PROXY
 			log_errx(1, "ctld(8) compiled without ICL_KERNEL_PROXY "
 			    "does not support iSER protocol");
 #endif
 			proxy_mode = true;
 			break;
 		case '?':
 		default:
 			usage();
 		}
 	}
 	argc -= optind;
 	if (argc != 0)
 		usage();
 
 	log_init(debug);
 	kernel_init();
 
 	oldconf = conf_new_from_kernel();
 	newconf = conf_new_from_file(config_path, oldconf, use_ucl);
 
 	if (newconf == NULL)
 		log_errx(1, "configuration error; exiting");
 	if (debug > 0) {
 		oldconf->conf_debug = debug;
 		newconf->conf_debug = debug;
 	}
 
 	error = conf_apply(oldconf, newconf);
 	if (error != 0)
 		log_errx(1, "failed to apply configuration; exiting");
 
 	conf_delete(oldconf);
 	oldconf = NULL;
 
 	register_signals();
 
 	if (dont_daemonize == false) {
 		log_debugx("daemonizing");
 		if (daemon(0, 0) == -1) {
 			log_warn("cannot daemonize");
 			pidfile_remove(newconf->conf_pidfh);
 			exit(1);
 		}
 	}
 
 	/* Schedule iSNS update */
 	if (!TAILQ_EMPTY(&newconf->conf_isns))
 		set_timeout((newconf->conf_isns_period + 2) / 3, false);
 
 	for (;;) {
 		main_loop(newconf, dont_daemonize);
 		if (sighup_received) {
 			sighup_received = false;
 			log_debugx("received SIGHUP, reloading configuration");
 			tmpconf = conf_new_from_file(config_path, newconf,
 			    use_ucl);
 
 			if (tmpconf == NULL) {
 				log_warnx("configuration error, "
 				    "continuing with old configuration");
 			} else {
 				if (debug > 0)
 					tmpconf->conf_debug = debug;
 				oldconf = newconf;
 				newconf = tmpconf;
 				error = conf_apply(oldconf, newconf);
 				if (error != 0)
 					log_warnx("failed to reload "
 					    "configuration");
 				conf_delete(oldconf);
 				oldconf = NULL;
 			}
 		} else if (sigterm_received) {
 			log_debugx("exiting on signal; "
 			    "reloading empty configuration");
 
 			log_debugx("removing CTL iSCSI ports "
 			    "and terminating all connections");
 
 			oldconf = newconf;
 			newconf = conf_new();
 			if (debug > 0)
 				newconf->conf_debug = debug;
 			error = conf_apply(oldconf, newconf);
 			if (error != 0)
 				log_warnx("failed to apply configuration");
 			conf_delete(oldconf);
 			oldconf = NULL;
 
 			log_warnx("exiting on signal");
 			exit(0);
 		} else {
 			nchildren -= wait_for_children(false);
 			assert(nchildren >= 0);
 			if (timed_out()) {
 				set_timeout(0, false);
 				TAILQ_FOREACH(newns, &newconf->conf_isns, i_next)
 					isns_check(newns);
 				/* Schedule iSNS update */
 				if (!TAILQ_EMPTY(&newconf->conf_isns)) {
 					set_timeout((newconf->conf_isns_period
 					    + 2) / 3,
 					    false);
 				}
 			}
 		}
 	}
 	/* NOTREACHED */
 }
Index: projects/clang391-import/usr.sbin/syslogd/syslogd.c
===================================================================
--- projects/clang391-import/usr.sbin/syslogd/syslogd.c	(revision 309262)
+++ projects/clang391-import/usr.sbin/syslogd/syslogd.c	(revision 309263)
@@ -1,2923 +1,2923 @@
 /*
  * Copyright (c) 1983, 1988, 1993, 1994
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef lint
 static const char copyright[] =
 "@(#) Copyright (c) 1983, 1988, 1993, 1994\n\
 	The Regents of the University of California.  All rights reserved.\n";
 #endif /* not lint */
 
 #ifndef lint
 #if 0
 static char sccsid[] = "@(#)syslogd.c	8.3 (Berkeley) 4/4/94";
 #endif
 #endif /* not lint */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  *  syslogd -- log system messages
  *
  * This program implements a system log. It takes a series of lines.
  * Each line may have a priority, signified as "<n>" as
  * the first characters of the line.  If this is
  * not present, a default priority is used.
  *
  * To kill syslogd, send a signal 15 (terminate).  A signal 1 (hup) will
  * cause it to reread its configuration file.
  *
  * Defined Constants:
  *
  * MAXLINE -- the maximum line length that can be handled.
  * DEFUPRI -- the default priority for user messages
  * DEFSPRI -- the default priority for kernel messages
  *
  * Author: Eric Allman
  * extensive changes by Ralph Campbell
  * more extensive changes by Eric Allman (again)
  * Extension to log by program name as well as facility and priority
  *   by Peter da Silva.
  * -u and -v by Harlan Stenn.
  * Priority comparison code by Harlan Stenn.
  */
 
 #define	MAXLINE		1024		/* maximum line length */
 #define	MAXSVLINE	MAXLINE		/* maximum saved line length */
 #define	DEFUPRI		(LOG_USER|LOG_NOTICE)
 #define	DEFSPRI		(LOG_KERN|LOG_CRIT)
 #define	TIMERINTVL	30		/* interval for checking flush, mark */
 #define	TTYMSGTIME	1		/* timeout passed to ttymsg */
 #define	RCVBUF_MINSIZE	(80 * 1024)	/* minimum size of dgram rcv buffer */
 
 #include <sys/param.h>
 #include <sys/ioctl.h>
 #include <sys/mman.h>
 #include <sys/stat.h>
 #include <sys/wait.h>
 #include <sys/socket.h>
 #include <sys/queue.h>
 #include <sys/uio.h>
 #include <sys/un.h>
 #include <sys/time.h>
 #include <sys/resource.h>
 #include <sys/syslimits.h>
 #include <sys/types.h>
 
 #include <netinet/in.h>
 #include <netdb.h>
 #include <arpa/inet.h>
 
 #include <ctype.h>
 #include <dirent.h>
 #include <err.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <libutil.h>
 #include <limits.h>
 #include <paths.h>
 #include <signal.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <sysexits.h>
 #include <unistd.h>
 #include <utmpx.h>
 
 #include "pathnames.h"
 #include "ttymsg.h"
 
 #define SYSLOG_NAMES
 #include <sys/syslog.h>
 
 const char	*ConfFile = _PATH_LOGCONF;
 const char	*PidFile = _PATH_LOGPID;
 const char	ctty[] = _PATH_CONSOLE;
 static const char	include_str[] = "include";
 static const char	include_ext[] = ".conf";
 
 #define	dprintf		if (Debug) printf
 
 #define	MAXUNAMES	20	/* maximum number of user names */
 
 /*
  * List of hosts for binding.
  */
 static STAILQ_HEAD(, host) hqueue;
 struct host {
 	char			*name;
 	STAILQ_ENTRY(host)	next;
 };
 
 /*
  * Unix sockets.
  * We have two default sockets, one with 666 permissions,
  * and one for privileged programs.
  */
 struct funix {
 	int			s;
 	const char		*name;
 	mode_t			mode;
 	STAILQ_ENTRY(funix)	next;
 };
 struct funix funix_secure =	{ -1, _PATH_LOG_PRIV, S_IRUSR | S_IWUSR,
 				{ NULL } };
 struct funix funix_default =	{ -1, _PATH_LOG, DEFFILEMODE,
 				{ &funix_secure } };
 
 STAILQ_HEAD(, funix) funixes =	{ &funix_default,
 				&(funix_secure.next.stqe_next) };
 
 /*
  * Flags to logmsg().
  */
 
 #define	IGN_CONS	0x001	/* don't print on console */
 #define	SYNC_FILE	0x002	/* do fsync on file after printing */
 #define	ADDDATE		0x004	/* add a date to the message */
 #define	MARK		0x008	/* this message is a mark */
 #define	ISKERNEL	0x010	/* kernel generated message */
 
 /*
  * This structure represents the files that will have log
  * copies printed.
  * We require f_file to be valid if f_type is F_FILE, F_CONSOLE, F_TTY
  * or if f_type if F_PIPE and f_pid > 0.
  */
 
 struct filed {
 	struct	filed *f_next;		/* next in linked list */
 	short	f_type;			/* entry type, see below */
 	short	f_file;			/* file descriptor */
 	time_t	f_time;			/* time this was last written */
 	char	*f_host;		/* host from which to recd. */
 	u_char	f_pmask[LOG_NFACILITIES+1];	/* priority mask */
 	u_char	f_pcmp[LOG_NFACILITIES+1];	/* compare priority */
 #define PRI_LT	0x1
 #define PRI_EQ	0x2
 #define PRI_GT	0x4
 	char	*f_program;		/* program this applies to */
 	union {
 		char	f_uname[MAXUNAMES][MAXLOGNAME];
 		struct {
 			char	f_hname[MAXHOSTNAMELEN];
 			struct addrinfo *f_addr;
 
 		} f_forw;		/* forwarding address */
 		char	f_fname[MAXPATHLEN];
 		struct {
 			char	f_pname[MAXPATHLEN];
 			pid_t	f_pid;
 		} f_pipe;
 	} f_un;
 	char	f_prevline[MAXSVLINE];		/* last message logged */
 	char	f_lasttime[16];			/* time of last occurrence */
 	char	f_prevhost[MAXHOSTNAMELEN];	/* host from which recd. */
 	int	f_prevpri;			/* pri of f_prevline */
 	int	f_prevlen;			/* length of f_prevline */
 	int	f_prevcount;			/* repetition cnt of prevline */
 	u_int	f_repeatcount;			/* number of "repeated" msgs */
 	int	f_flags;			/* file-specific flags */
 #define	FFLAG_SYNC 0x01
 #define	FFLAG_NEEDSYNC	0x02
 };
 
 /*
  * Queue of about-to-be dead processes we should watch out for.
  */
 
 TAILQ_HEAD(stailhead, deadq_entry) deadq_head;
 struct stailhead *deadq_headp;
 
 struct deadq_entry {
 	pid_t				dq_pid;
 	int				dq_timeout;
 	TAILQ_ENTRY(deadq_entry)	dq_entries;
 };
 
 /*
  * The timeout to apply to processes waiting on the dead queue.  Unit
  * of measure is `mark intervals', i.e. 20 minutes by default.
  * Processes on the dead queue will be terminated after that time.
  */
 
 #define	 DQ_TIMO_INIT	2
 
 typedef struct deadq_entry *dq_t;
 
 
 /*
  * Struct to hold records of network addresses that are allowed to log
  * to us.
  */
 struct allowedpeer {
 	int isnumeric;
 	u_short port;
 	union {
 		struct {
 			struct sockaddr_storage addr;
 			struct sockaddr_storage mask;
 		} numeric;
 		char *name;
 	} u;
 #define a_addr u.numeric.addr
 #define a_mask u.numeric.mask
 #define a_name u.name
 };
 
 
 /*
  * Intervals at which we flush out "message repeated" messages,
  * in seconds after previous message is logged.  After each flush,
  * we move to the next interval until we reach the largest.
  */
 int	repeatinterval[] = { 30, 120, 600 };	/* # of secs before flush */
 #define	MAXREPEAT ((sizeof(repeatinterval) / sizeof(repeatinterval[0])) - 1)
 #define	REPEATTIME(f)	((f)->f_time + repeatinterval[(f)->f_repeatcount])
 #define	BACKOFF(f)	{ if (++(f)->f_repeatcount > MAXREPEAT) \
 				 (f)->f_repeatcount = MAXREPEAT; \
 			}
 
 /* values for f_type */
 #define F_UNUSED	0		/* unused entry */
 #define F_FILE		1		/* regular file */
 #define F_TTY		2		/* terminal */
 #define F_CONSOLE	3		/* console terminal */
 #define F_FORW		4		/* remote machine */
 #define F_USERS		5		/* list of users */
 #define F_WALL		6		/* everyone logged on */
 #define F_PIPE		7		/* pipe to program */
 
 const char *TypeNames[8] = {
 	"UNUSED",	"FILE",		"TTY",		"CONSOLE",
 	"FORW",		"USERS",	"WALL",		"PIPE"
 };
 
 static struct filed *Files;	/* Log files that we write to */
 static struct filed consfile;	/* Console */
 
 static int	Debug;		/* debug flag */
 static int	Foreground = 0;	/* Run in foreground, instead of daemonizing */
 static int	resolve = 1;	/* resolve hostname */
 static char	LocalHostName[MAXHOSTNAMELEN];	/* our hostname */
 static const char *LocalDomain;	/* our local domain name */
 static int	*finet;		/* Internet datagram sockets */
 static int	fklog = -1;	/* /dev/klog */
 static int	Initialized;	/* set when we have initialized ourselves */
 static int	MarkInterval = 20 * 60;	/* interval between marks in seconds */
 static int	MarkSeq;	/* mark sequence number */
 static int	NoBind;		/* don't bind() as suggested by RFC 3164 */
 static int	SecureMode;	/* when true, receive only unix domain socks */
 #ifdef INET6
 static int	family = PF_UNSPEC; /* protocol family (IPv4, IPv6 or both) */
 #else
 static int	family = PF_INET; /* protocol family (IPv4 only) */
 #endif
 static int	mask_C1 = 1;	/* mask characters from 0x80 - 0x9F */
 static int	send_to_all;	/* send message to all IPv4/IPv6 addresses */
 static int	use_bootfile;	/* log entire bootfile for every kern msg */
 static int	no_compress;	/* don't compress messages (1=pipes, 2=all) */
 static int	logflags = O_WRONLY|O_APPEND; /* flags used to open log files */
 
 static char	bootfile[MAXLINE+1]; /* booted kernel file */
 
 struct allowedpeer *AllowedPeers; /* List of allowed peers */
 static int	NumAllowed;	/* Number of entries in AllowedPeers */
 static int	RemoteAddDate;	/* Always set the date on remote messages */
 
 static int	UniquePriority;	/* Only log specified priority? */
 static int	LogFacPri;	/* Put facility and priority in log message: */
 				/* 0=no, 1=numeric, 2=names */
 static int	KeepKernFac;	/* Keep remotely logged kernel facility */
 static int	needdofsync = 0; /* Are any file(s) waiting to be fsynced? */
 static struct pidfh *pfh;
 
 volatile sig_atomic_t MarkSet, WantDie;
 
 static int	allowaddr(char *);
 static void	cfline(const char *, struct filed *,
 		    const char *, const char *);
 static const char *cvthname(struct sockaddr *);
 static void	deadq_enter(pid_t, const char *);
 static int	deadq_remove(pid_t);
 static int	decode(const char *, const CODE *);
 static void	die(int) __dead2;
 static void	dodie(int);
 static void	dofsync(void);
 static void	domark(int);
 static void	fprintlog(struct filed *, int, const char *);
 static int	*socksetup(int, char *);
 static void	init(int);
 static void	logerror(const char *);
 static void	logmsg(int, const char *, const char *, int);
 static void	log_deadchild(pid_t, int, const char *);
 static void	markit(void);
 static int	skip_message(const char *, const char *, int);
 static void	printline(const char *, char *, int);
 static void	printsys(char *);
 static int	p_open(const char *, pid_t *);
 static void	readklog(void);
 static void	reapchild(int);
 static const char *ttymsg_check(struct iovec *, int, char *, int);
 static void	usage(void);
 static int	validate(struct sockaddr *, const char *);
 static void	unmapped(struct sockaddr *);
 static void	wallmsg(struct filed *, struct iovec *, const int iovlen);
 static int	waitdaemon(int, int, int);
 static void	timedout(int);
 static void	increase_rcvbuf(int);
 
 static void
 close_filed(struct filed *f)
 {
 
 	if (f == NULL || f->f_file == -1)
 		return;
 
 	(void)close(f->f_file);
 	f->f_file = -1;
 	f->f_type = F_UNUSED;
 }
 
 int
 main(int argc, char *argv[])
 {
 	int ch, i, fdsrmax = 0, l;
 	struct sockaddr_un sunx, fromunix;
 	struct sockaddr_storage frominet;
 	fd_set *fdsr = NULL;
 	char line[MAXLINE + 1];
 	const char *hname;
 	struct timeval tv, *tvp;
 	struct sigaction sact;
 	struct host *host;
 	struct funix *fx, *fx1;
 	sigset_t mask;
 	pid_t ppid = 1, spid;
 	socklen_t len;
 
 	if (madvise(NULL, 0, MADV_PROTECT) != 0)
 		dprintf("madvise() failed: %s\n", strerror(errno));
 
 	STAILQ_INIT(&hqueue);
 
 	while ((ch = getopt(argc, argv, "468Aa:b:cCdf:Fkl:m:nNop:P:sS:Tuv"))
 	    != -1)
 		switch (ch) {
 		case '4':
 			family = PF_INET;
 			break;
 #ifdef INET6
 		case '6':
 			family = PF_INET6;
 			break;
 #endif
 		case '8':
 			mask_C1 = 0;
 			break;
 		case 'A':
 			send_to_all++;
 			break;
 		case 'a':		/* allow specific network addresses only */
 			if (allowaddr(optarg) == -1)
 				usage();
 			break;
 		case 'b':
 		   {
 			if ((host = malloc(sizeof(struct host))) == NULL)
 				err(1, "malloc failed");
 			host->name = optarg;
 			STAILQ_INSERT_TAIL(&hqueue, host, next);
 			break;
 		   }
 		case 'c':
 			no_compress++;
 			break;
 		case 'C':
 			logflags |= O_CREAT;
 			break;
 		case 'd':		/* debug */
 			Debug++;
 			break;
 		case 'f':		/* configuration file */
 			ConfFile = optarg;
 			break;
 		case 'F':		/* run in foreground instead of daemon */
 			Foreground++;
 			break;
 		case 'k':		/* keep remote kern fac */
 			KeepKernFac = 1;
 			break;
 		case 'l':
 		    {
 			long	perml;
 			mode_t	mode;
 			char	*name, *ep;
 
 			if (optarg[0] == '/') {
 				mode = DEFFILEMODE;
 				name = optarg;
 			} else if ((name = strchr(optarg, ':')) != NULL) {
 				*name++ = '\0';
 				if (name[0] != '/')
 					errx(1, "socket name must be absolute "
 					    "path");
 				if (isdigit(*optarg)) {
 					perml = strtol(optarg, &ep, 8);
 				    if (*ep || perml < 0 ||
 					perml & ~(S_IRWXU|S_IRWXG|S_IRWXO))
 					    errx(1, "invalid mode %s, exiting",
 						optarg);
 				    mode = (mode_t )perml;
 				} else
 					errx(1, "invalid mode %s, exiting",
 					    optarg);
 			} else	/* doesn't begin with '/', and no ':' */
 				errx(1, "can't parse path %s", optarg);
 
 			if (strlen(name) >= sizeof(sunx.sun_path))
 				errx(1, "%s path too long, exiting", name);
 			if ((fx = malloc(sizeof(struct funix))) == NULL)
 				err(1, "malloc failed");
 			fx->s = -1;
 			fx->name = name;
 			fx->mode = mode;
 			STAILQ_INSERT_TAIL(&funixes, fx, next);
 			break;
 		   }
 		case 'm':		/* mark interval */
 			MarkInterval = atoi(optarg) * 60;
 			break;
 		case 'N':
 			NoBind = 1;
 			SecureMode = 1;
 			break;
 		case 'n':
 			resolve = 0;
 			break;
 		case 'o':
 			use_bootfile = 1;
 			break;
 		case 'p':		/* path */
 			if (strlen(optarg) >= sizeof(sunx.sun_path))
 				errx(1, "%s path too long, exiting", optarg);
 			funix_default.name = optarg;
 			break;
 		case 'P':		/* path for alt. PID */
 			PidFile = optarg;
 			break;
 		case 's':		/* no network mode */
 			SecureMode++;
 			break;
 		case 'S':		/* path for privileged originator */
 			if (strlen(optarg) >= sizeof(sunx.sun_path))
 				errx(1, "%s path too long, exiting", optarg);
 			funix_secure.name = optarg;
 			break;
 		case 'T':
 			RemoteAddDate = 1;
 			break;
 		case 'u':		/* only log specified priority */
 			UniquePriority++;
 			break;
 		case 'v':		/* log facility and priority */
 		  	LogFacPri++;
 			break;
 		default:
 			usage();
 		}
 	if ((argc -= optind) != 0)
 		usage();
 
 	pfh = pidfile_open(PidFile, 0600, &spid);
 	if (pfh == NULL) {
 		if (errno == EEXIST)
 			errx(1, "syslogd already running, pid: %d", spid);
 		warn("cannot open pid file");
 	}
 
 	if ((!Foreground) && (!Debug)) {
 		ppid = waitdaemon(0, 0, 30);
 		if (ppid < 0) {
 			warn("could not become daemon");
 			pidfile_remove(pfh);
 			exit(1);
 		}
 	} else if (Debug) {
 		setlinebuf(stdout);
 	}
 
 	if (NumAllowed)
 		endservent();
 
 	consfile.f_type = F_CONSOLE;
 	(void)strlcpy(consfile.f_un.f_fname, ctty + sizeof _PATH_DEV - 1,
 	    sizeof(consfile.f_un.f_fname));
 	(void)strlcpy(bootfile, getbootfile(), sizeof(bootfile));
 	(void)signal(SIGTERM, dodie);
 	(void)signal(SIGINT, Debug ? dodie : SIG_IGN);
 	(void)signal(SIGQUIT, Debug ? dodie : SIG_IGN);
 	/*
 	 * We don't want the SIGCHLD and SIGHUP handlers to interfere
 	 * with each other; they are likely candidates for being called
 	 * simultaneously (SIGHUP closes pipe descriptor, process dies,
 	 * SIGCHLD happens).
 	 */
 	sigemptyset(&mask);
 	sigaddset(&mask, SIGHUP);
 	sact.sa_handler = reapchild;
 	sact.sa_mask = mask;
 	sact.sa_flags = SA_RESTART;
 	(void)sigaction(SIGCHLD, &sact, NULL);
 	(void)signal(SIGALRM, domark);
 	(void)signal(SIGPIPE, SIG_IGN);	/* We'll catch EPIPE instead. */
 	(void)alarm(TIMERINTVL);
 
 	TAILQ_INIT(&deadq_head);
 
 #ifndef SUN_LEN
 #define SUN_LEN(unp) (strlen((unp)->sun_path) + 2)
 #endif
 	STAILQ_FOREACH_SAFE(fx, &funixes, next, fx1) {
 		(void)unlink(fx->name);
 		memset(&sunx, 0, sizeof(sunx));
 		sunx.sun_family = AF_LOCAL;
 		(void)strlcpy(sunx.sun_path, fx->name, sizeof(sunx.sun_path));
 		fx->s = socket(PF_LOCAL, SOCK_DGRAM, 0);
 		if (fx->s < 0 ||
 		    bind(fx->s, (struct sockaddr *)&sunx, SUN_LEN(&sunx)) < 0 ||
 		    chmod(fx->name, fx->mode) < 0) {
 			(void)snprintf(line, sizeof line,
 					"cannot create %s", fx->name);
 			logerror(line);
 			dprintf("cannot create %s (%d)\n", fx->name, errno);
 			if (fx == &funix_default || fx == &funix_secure)
 				die(0);
 			else {
 				STAILQ_REMOVE(&funixes, fx, funix, next);
 				continue;
 			}
 		}
 		increase_rcvbuf(fx->s);
 	}
 	if (SecureMode <= 1) {
 		if (STAILQ_EMPTY(&hqueue))
 			finet = socksetup(family, NULL);
 		STAILQ_FOREACH(host, &hqueue, next) {
 			int *finet0, total;
 			finet0 = socksetup(family, host->name);
 			if (finet0 && !finet) {
 				finet = finet0;
 			} else if (finet0 && finet) {
 				total = *finet0 + *finet + 1;
 				finet = realloc(finet, total * sizeof(int));
 				if (finet == NULL)
 					err(1, "realloc failed");
 				for (i = 1; i <= *finet0; i++) {
 					finet[(*finet)+i] = finet0[i];
 				}
 				*finet = total - 1;
 				free(finet0);
 			}
 		}
 	}
 
 	if (finet) {
 		if (SecureMode) {
 			for (i = 0; i < *finet; i++) {
 				if (shutdown(finet[i+1], SHUT_RD) < 0 &&
 				    errno != ENOTCONN) {
 					logerror("shutdown");
 					if (!Debug)
 						die(0);
 				}
 			}
 		} else {
 			dprintf("listening on inet and/or inet6 socket\n");
 		}
 		dprintf("sending on inet and/or inet6 socket\n");
 	}
 
 	if ((fklog = open(_PATH_KLOG, O_RDONLY|O_NONBLOCK, 0)) < 0)
 		dprintf("can't open %s (%d)\n", _PATH_KLOG, errno);
 
 	/* tuck my process id away */
 	pidfile_write(pfh);
 
 	dprintf("off & running....\n");
 
 	init(0);
 	/* prevent SIGHUP and SIGCHLD handlers from running in parallel */
 	sigemptyset(&mask);
 	sigaddset(&mask, SIGCHLD);
 	sact.sa_handler = init;
 	sact.sa_mask = mask;
 	sact.sa_flags = SA_RESTART;
 	(void)sigaction(SIGHUP, &sact, NULL);
 
 	tvp = &tv;
 	tv.tv_sec = tv.tv_usec = 0;
 
 	if (fklog != -1 && fklog > fdsrmax)
 		fdsrmax = fklog;
 	if (finet && !SecureMode) {
 		for (i = 0; i < *finet; i++) {
 		    if (finet[i+1] != -1 && finet[i+1] > fdsrmax)
 			fdsrmax = finet[i+1];
 		}
 	}
 	STAILQ_FOREACH(fx, &funixes, next)
 		if (fx->s > fdsrmax)
 			fdsrmax = fx->s;
 
 	fdsr = (fd_set *)calloc(howmany(fdsrmax+1, NFDBITS),
 	    sizeof(fd_mask));
 	if (fdsr == NULL)
 		errx(1, "calloc fd_set");
 
 	for (;;) {
 		if (MarkSet)
 			markit();
 		if (WantDie)
 			die(WantDie);
 
 		bzero(fdsr, howmany(fdsrmax+1, NFDBITS) *
 		    sizeof(fd_mask));
 
 		if (fklog != -1)
 			FD_SET(fklog, fdsr);
 		if (finet && !SecureMode) {
 			for (i = 0; i < *finet; i++) {
 				if (finet[i+1] != -1)
 					FD_SET(finet[i+1], fdsr);
 			}
 		}
 		STAILQ_FOREACH(fx, &funixes, next)
 			FD_SET(fx->s, fdsr);
 
 		i = select(fdsrmax+1, fdsr, NULL, NULL,
 		    needdofsync ? &tv : tvp);
 		switch (i) {
 		case 0:
 			dofsync();
 			needdofsync = 0;
 			if (tvp) {
 				tvp = NULL;
 				if (ppid != 1)
 					kill(ppid, SIGALRM);
 			}
 			continue;
 		case -1:
 			if (errno != EINTR)
 				logerror("select");
 			continue;
 		}
 		if (fklog != -1 && FD_ISSET(fklog, fdsr))
 			readklog();
 		if (finet && !SecureMode) {
 			for (i = 0; i < *finet; i++) {
 				if (FD_ISSET(finet[i+1], fdsr)) {
 					len = sizeof(frominet);
 					l = recvfrom(finet[i+1], line, MAXLINE,
 					     0, (struct sockaddr *)&frominet,
 					     &len);
 					if (l > 0) {
 						line[l] = '\0';
 						hname = cvthname((struct sockaddr *)&frominet);
 						unmapped((struct sockaddr *)&frominet);
 						if (validate((struct sockaddr *)&frominet, hname))
 							printline(hname, line, RemoteAddDate ? ADDDATE : 0);
 					} else if (l < 0 && errno != EINTR)
 						logerror("recvfrom inet");
 				}
 			}
 		}
 		STAILQ_FOREACH(fx, &funixes, next) {
 			if (FD_ISSET(fx->s, fdsr)) {
 				len = sizeof(fromunix);
 				l = recvfrom(fx->s, line, MAXLINE, 0,
 				    (struct sockaddr *)&fromunix, &len);
 				if (l > 0) {
 					line[l] = '\0';
 					printline(LocalHostName, line, 0);
 				} else if (l < 0 && errno != EINTR)
 					logerror("recvfrom unix");
 			}
 		}
 	}
 	if (fdsr)
 		free(fdsr);
 }
 
 static void
 unmapped(struct sockaddr *sa)
 {
 	struct sockaddr_in6 *sin6;
 	struct sockaddr_in sin4;
 
 	if (sa->sa_family != AF_INET6)
 		return;
 	if (sa->sa_len != sizeof(struct sockaddr_in6) ||
 	    sizeof(sin4) > sa->sa_len)
 		return;
 	sin6 = (struct sockaddr_in6 *)sa;
 	if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr))
 		return;
 
 	memset(&sin4, 0, sizeof(sin4));
 	sin4.sin_family = AF_INET;
 	sin4.sin_len = sizeof(struct sockaddr_in);
 	memcpy(&sin4.sin_addr, &sin6->sin6_addr.s6_addr[12],
 	       sizeof(sin4.sin_addr));
 	sin4.sin_port = sin6->sin6_port;
 
 	memcpy(sa, &sin4, sin4.sin_len);
 }
 
 static void
 usage(void)
 {
 
 	fprintf(stderr, "%s\n%s\n%s\n%s\n",
 		"usage: syslogd [-468ACcdFknosTuv] [-a allowed_peer]",
 		"               [-b bind_address] [-f config_file]",
 		"               [-l [mode:]path] [-m mark_interval]",
 		"               [-P pid_file] [-p log_socket]");
 	exit(1);
 }
 
 /*
  * Take a raw input line, decode the message, and print the message
  * on the appropriate log files.
  */
 static void
 printline(const char *hname, char *msg, int flags)
 {
 	char *p, *q;
 	long n;
 	int c, pri;
 	char line[MAXLINE + 1];
 
 	/* test for special codes */
 	p = msg;
 	pri = DEFUPRI;
 	if (*p == '<') {
 		errno = 0;
 		n = strtol(p + 1, &q, 10);
 		if (*q == '>' && n >= 0 && n < INT_MAX && errno == 0) {
 			p = q + 1;
 			pri = n;
 		}
 	}
 	if (pri &~ (LOG_FACMASK|LOG_PRIMASK))
 		pri = DEFUPRI;
 
 	/*
 	 * Don't allow users to log kernel messages.
 	 * NOTE: since LOG_KERN == 0 this will also match
 	 *       messages with no facility specified.
 	 */
 	if ((pri & LOG_FACMASK) == LOG_KERN && !KeepKernFac)
 		pri = LOG_MAKEPRI(LOG_USER, LOG_PRI(pri));
 
 	q = line;
 
 	while ((c = (unsigned char)*p++) != '\0' &&
 	    q < &line[sizeof(line) - 4]) {
 		if (mask_C1 && (c & 0x80) && c < 0xA0) {
 			c &= 0x7F;
 			*q++ = 'M';
 			*q++ = '-';
 		}
 		if (isascii(c) && iscntrl(c)) {
 			if (c == '\n') {
 				*q++ = ' ';
 			} else if (c == '\t') {
 				*q++ = '\t';
 			} else {
 				*q++ = '^';
 				*q++ = c ^ 0100;
 			}
 		} else {
 			*q++ = c;
 		}
 	}
 	*q = '\0';
 
 	logmsg(pri, line, hname, flags);
 }
 
 /*
  * Read /dev/klog while data are available, split into lines.
  */
 static void
 readklog(void)
 {
 	char *p, *q, line[MAXLINE + 1];
 	int len, i;
 
 	len = 0;
 	for (;;) {
 		i = read(fklog, line + len, MAXLINE - 1 - len);
 		if (i > 0) {
 			line[i + len] = '\0';
 		} else {
 			if (i < 0 && errno != EINTR && errno != EAGAIN) {
 				logerror("klog");
 				fklog = -1;
 			}
 			break;
 		}
 
 		for (p = line; (q = strchr(p, '\n')) != NULL; p = q + 1) {
 			*q = '\0';
 			printsys(p);
 		}
 		len = strlen(p);
 		if (len >= MAXLINE - 1) {
 			printsys(p);
 			len = 0;
 		}
 		if (len > 0)
 			memmove(line, p, len + 1);
 	}
 	if (len > 0)
 		printsys(line);
 }
 
 /*
  * Take a raw input line from /dev/klog, format similar to syslog().
  */
 static void
 printsys(char *msg)
 {
 	char *p, *q;
 	long n;
 	int flags, isprintf, pri;
 
 	flags = ISKERNEL | SYNC_FILE | ADDDATE;	/* fsync after write */
 	p = msg;
 	pri = DEFSPRI;
 	isprintf = 1;
 	if (*p == '<') {
 		errno = 0;
 		n = strtol(p + 1, &q, 10);
 		if (*q == '>' && n >= 0 && n < INT_MAX && errno == 0) {
 			p = q + 1;
 			pri = n;
 			isprintf = 0;
 		}
 	}
 	/*
 	 * Kernel printf's and LOG_CONSOLE messages have been displayed
 	 * on the console already.
 	 */
 	if (isprintf || (pri & LOG_FACMASK) == LOG_CONSOLE)
 		flags |= IGN_CONS;
 	if (pri &~ (LOG_FACMASK|LOG_PRIMASK))
 		pri = DEFSPRI;
 	logmsg(pri, p, LocalHostName, flags);
 }
 
 static time_t	now;
 
 /*
  * Match a program or host name against a specification.
  * Return a non-0 value if the message must be ignored
  * based on the specification.
  */
 static int
 skip_message(const char *name, const char *spec, int checkcase)
 {
 	const char *s;
 	char prev, next;
 	int exclude = 0;
 	/* Behaviour on explicit match */
 
 	if (spec == NULL)
 		return 0;
 	switch (*spec) {
 	case '-':
 		exclude = 1;
 		/*FALLTHROUGH*/
 	case '+':
 		spec++;
 		break;
 	default:
 		break;
 	}
 	if (checkcase)
 		s = strstr (spec, name);
 	else
 		s = strcasestr (spec, name);
 
 	if (s != NULL) {
 		prev = (s == spec ? ',' : *(s - 1));
 		next = *(s + strlen (name));
 
 		if (prev == ',' && (next == '\0' || next == ','))
 			/* Explicit match: skip iff the spec is an
 			   exclusive one. */
 			return exclude;
 	}
 
 	/* No explicit match for this name: skip the message iff
 	   the spec is an inclusive one. */
 	return !exclude;
 }
 
 /*
  * Log a message to the appropriate log files, users, etc. based on
  * the priority.
  */
 static void
 logmsg(int pri, const char *msg, const char *from, int flags)
 {
 	struct filed *f;
 	int i, fac, msglen, omask, prilev;
 	const char *timestamp;
  	char prog[NAME_MAX+1];
 	char buf[MAXLINE+1];
 
 	dprintf("logmsg: pri %o, flags %x, from %s, msg %s\n",
 	    pri, flags, from, msg);
 
 	omask = sigblock(sigmask(SIGHUP)|sigmask(SIGALRM));
 
 	/*
 	 * Check to see if msg looks non-standard.
 	 */
 	msglen = strlen(msg);
 	if (msglen < 16 || msg[3] != ' ' || msg[6] != ' ' ||
 	    msg[9] != ':' || msg[12] != ':' || msg[15] != ' ')
 		flags |= ADDDATE;
 
 	(void)time(&now);
 	if (flags & ADDDATE) {
 		timestamp = ctime(&now) + 4;
 	} else {
 		timestamp = msg;
 		msg += 16;
 		msglen -= 16;
 	}
 
 	/* skip leading blanks */
 	while (isspace(*msg)) {
 		msg++;
 		msglen--;
 	}
 
 	/* extract facility and priority level */
 	if (flags & MARK)
 		fac = LOG_NFACILITIES;
 	else
 		fac = LOG_FAC(pri);
 
 	/* Check maximum facility number. */
 	if (fac > LOG_NFACILITIES) {
 		(void)sigsetmask(omask);
 		return;
 	}
 
 	prilev = LOG_PRI(pri);
 
 	/* extract program name */
 	for (i = 0; i < NAME_MAX; i++) {
 		if (!isprint(msg[i]) || msg[i] == ':' || msg[i] == '[' ||
 		    msg[i] == '/' || isspace(msg[i]))
 			break;
 		prog[i] = msg[i];
 	}
 	prog[i] = 0;
 
 	/* add kernel prefix for kernel messages */
 	if (flags & ISKERNEL) {
 		snprintf(buf, sizeof(buf), "%s: %s",
 		    use_bootfile ? bootfile : "kernel", msg);
 		msg = buf;
 		msglen = strlen(buf);
 	}
 
 	/* log the message to the particular outputs */
 	if (!Initialized) {
 		f = &consfile;
 		/*
 		 * Open in non-blocking mode to avoid hangs during open
 		 * and close(waiting for the port to drain).
 		 */
 		f->f_file = open(ctty, O_WRONLY | O_NONBLOCK, 0);
 
 		if (f->f_file >= 0) {
 			(void)strlcpy(f->f_lasttime, timestamp,
 				sizeof(f->f_lasttime));
 			fprintlog(f, flags, msg);
 			close(f->f_file);
 			f->f_file = -1;
 		}
 		(void)sigsetmask(omask);
 		return;
 	}
 	for (f = Files; f; f = f->f_next) {
 		/* skip messages that are incorrect priority */
 		if (!(((f->f_pcmp[fac] & PRI_EQ) && (f->f_pmask[fac] == prilev))
 		     ||((f->f_pcmp[fac] & PRI_LT) && (f->f_pmask[fac] < prilev))
 		     ||((f->f_pcmp[fac] & PRI_GT) && (f->f_pmask[fac] > prilev))
 		     )
 		    || f->f_pmask[fac] == INTERNAL_NOPRI)
 			continue;
 
 		/* skip messages with the incorrect hostname */
 		if (skip_message(from, f->f_host, 0))
 			continue;
 
 		/* skip messages with the incorrect program name */
 		if (skip_message(prog, f->f_program, 1))
 			continue;
 
 		/* skip message to console if it has already been printed */
 		if (f->f_type == F_CONSOLE && (flags & IGN_CONS))
 			continue;
 
 		/* don't output marks to recently written files */
 		if ((flags & MARK) && (now - f->f_time) < MarkInterval / 2)
 			continue;
 
 		/*
 		 * suppress duplicate lines to this file
 		 */
 		if (no_compress - (f->f_type != F_PIPE) < 1 &&
 		    (flags & MARK) == 0 && msglen == f->f_prevlen &&
 		    !strcmp(msg, f->f_prevline) &&
 		    !strcasecmp(from, f->f_prevhost)) {
 			(void)strlcpy(f->f_lasttime, timestamp,
 				sizeof(f->f_lasttime));
 			f->f_prevcount++;
 			dprintf("msg repeated %d times, %ld sec of %d\n",
 			    f->f_prevcount, (long)(now - f->f_time),
 			    repeatinterval[f->f_repeatcount]);
 			/*
 			 * If domark would have logged this by now,
 			 * flush it now (so we don't hold isolated messages),
 			 * but back off so we'll flush less often
 			 * in the future.
 			 */
 			if (now > REPEATTIME(f)) {
 				fprintlog(f, flags, (char *)NULL);
 				BACKOFF(f);
 			}
 		} else {
 			/* new line, save it */
 			if (f->f_prevcount)
 				fprintlog(f, 0, (char *)NULL);
 			f->f_repeatcount = 0;
 			f->f_prevpri = pri;
 			(void)strlcpy(f->f_lasttime, timestamp,
 				sizeof(f->f_lasttime));
 			(void)strlcpy(f->f_prevhost, from,
 			    sizeof(f->f_prevhost));
 			if (msglen < MAXSVLINE) {
 				f->f_prevlen = msglen;
 				(void)strlcpy(f->f_prevline, msg, sizeof(f->f_prevline));
 				fprintlog(f, flags, (char *)NULL);
 			} else {
 				f->f_prevline[0] = 0;
 				f->f_prevlen = 0;
 				fprintlog(f, flags, msg);
 			}
 		}
 	}
 	(void)sigsetmask(omask);
 }
 
 static void
 dofsync(void)
 {
 	struct filed *f;
 
 	for (f = Files; f; f = f->f_next) {
 		if ((f->f_type == F_FILE) &&
 		    (f->f_flags & FFLAG_NEEDSYNC)) {
 			f->f_flags &= ~FFLAG_NEEDSYNC;
 			(void)fsync(f->f_file);
 		}
 	}
 }
 
 #define IOV_SIZE 7
 static void
 fprintlog(struct filed *f, int flags, const char *msg)
 {
 	struct iovec iov[IOV_SIZE];
 	struct iovec *v;
 	struct addrinfo *r;
 	int i, l, lsent = 0;
 	char line[MAXLINE + 1], repbuf[80], greetings[200], *wmsg = NULL;
 	char nul[] = "", space[] = " ", lf[] = "\n", crlf[] = "\r\n";
 	const char *msgret;
 
 	v = iov;
 	if (f->f_type == F_WALL) {
 		v->iov_base = greetings;
 		/* The time displayed is not synchornized with the other log
 		 * destinations (like messages).  Following fragment was using
 		 * ctime(&now), which was updating the time every 30 sec.
 		 * With f_lasttime, time is synchronized correctly.
 		 */
 		v->iov_len = snprintf(greetings, sizeof greetings,
 		    "\r\n\7Message from syslogd@%s at %.24s ...\r\n",
 		    f->f_prevhost, f->f_lasttime);
 		if (v->iov_len >= sizeof greetings)
 			v->iov_len = sizeof greetings - 1;
 		v++;
 		v->iov_base = nul;
 		v->iov_len = 0;
 		v++;
 	} else {
 		v->iov_base = f->f_lasttime;
 		v->iov_len = strlen(f->f_lasttime);
 		v++;
 		v->iov_base = space;
 		v->iov_len = 1;
 		v++;
 	}
 
 	if (LogFacPri) {
 	  	static char fp_buf[30];	/* Hollow laugh */
 		int fac = f->f_prevpri & LOG_FACMASK;
 		int pri = LOG_PRI(f->f_prevpri);
 		const char *f_s = NULL;
 		char f_n[5];	/* Hollow laugh */
 		const char *p_s = NULL;
 		char p_n[5];	/* Hollow laugh */
 
 		if (LogFacPri > 1) {
 		  const CODE *c;
 
 		  for (c = facilitynames; c->c_name; c++) {
 		    if (c->c_val == fac) {
 		      f_s = c->c_name;
 		      break;
 		    }
 		  }
 		  for (c = prioritynames; c->c_name; c++) {
 		    if (c->c_val == pri) {
 		      p_s = c->c_name;
 		      break;
 		    }
 		  }
 		}
 		if (!f_s) {
 		  snprintf(f_n, sizeof f_n, "%d", LOG_FAC(fac));
 		  f_s = f_n;
 		}
 		if (!p_s) {
 		  snprintf(p_n, sizeof p_n, "%d", pri);
 		  p_s = p_n;
 		}
 		snprintf(fp_buf, sizeof fp_buf, "<%s.%s> ", f_s, p_s);
 		v->iov_base = fp_buf;
 		v->iov_len = strlen(fp_buf);
 	} else {
 		v->iov_base = nul;
 		v->iov_len = 0;
 	}
 	v++;
 
 	v->iov_base = f->f_prevhost;
 	v->iov_len = strlen(v->iov_base);
 	v++;
 	v->iov_base = space;
 	v->iov_len = 1;
 	v++;
 
 	if (msg) {
 		wmsg = strdup(msg); /* XXX iov_base needs a `const' sibling. */
 		if (wmsg == NULL) {
 			logerror("strdup");
 			exit(1);
 		}
 		v->iov_base = wmsg;
 		v->iov_len = strlen(msg);
 	} else if (f->f_prevcount > 1) {
 		v->iov_base = repbuf;
 		v->iov_len = snprintf(repbuf, sizeof repbuf,
 		    "last message repeated %d times", f->f_prevcount);
 	} else {
 		v->iov_base = f->f_prevline;
 		v->iov_len = f->f_prevlen;
 	}
 	v++;
 
 	dprintf("Logging to %s", TypeNames[f->f_type]);
 	f->f_time = now;
 
 	switch (f->f_type) {
 		int port;
 	case F_UNUSED:
 		dprintf("\n");
 		break;
 
 	case F_FORW:
 		port = (int)ntohs(((struct sockaddr_in *)
 			    (f->f_un.f_forw.f_addr->ai_addr))->sin_port);
 		if (port != 514) {
 			dprintf(" %s:%d\n", f->f_un.f_forw.f_hname, port);
 		} else {
 			dprintf(" %s\n", f->f_un.f_forw.f_hname);
 		}
 		/* check for local vs remote messages */
 		if (strcasecmp(f->f_prevhost, LocalHostName))
 			l = snprintf(line, sizeof line - 1,
 			    "<%d>%.15s Forwarded from %s: %s",
 			    f->f_prevpri, (char *)iov[0].iov_base,
 			    f->f_prevhost, (char *)iov[5].iov_base);
 		else
 			l = snprintf(line, sizeof line - 1, "<%d>%.15s %s",
 			     f->f_prevpri, (char *)iov[0].iov_base,
 			    (char *)iov[5].iov_base);
 		if (l < 0)
 			l = 0;
 		else if (l > MAXLINE)
 			l = MAXLINE;
 
 		if (finet) {
 			for (r = f->f_un.f_forw.f_addr; r; r = r->ai_next) {
 				for (i = 0; i < *finet; i++) {
 #if 0
 					/*
 					 * should we check AF first, or just
 					 * trial and error? FWD
 					 */
 					if (r->ai_family ==
 					    address_family_of(finet[i+1]))
 #endif
 					lsent = sendto(finet[i+1], line, l, 0,
 					    r->ai_addr, r->ai_addrlen);
 					if (lsent == l)
 						break;
 				}
 				if (lsent == l && !send_to_all)
 					break;
 			}
 			dprintf("lsent/l: %d/%d\n", lsent, l);
 			if (lsent != l) {
 				int e = errno;
 				logerror("sendto");
 				errno = e;
 				switch (errno) {
 				case ENOBUFS:
 				case ENETDOWN:
 				case ENETUNREACH:
 				case EHOSTUNREACH:
 				case EHOSTDOWN:
 				case EADDRNOTAVAIL:
 					break;
 				/* case EBADF: */
 				/* case EACCES: */
 				/* case ENOTSOCK: */
 				/* case EFAULT: */
 				/* case EMSGSIZE: */
 				/* case EAGAIN: */
 				/* case ENOBUFS: */
 				/* case ECONNREFUSED: */
 				default:
 					dprintf("removing entry: errno=%d\n", e);
 					f->f_type = F_UNUSED;
 					break;
 				}
 			}
 		}
 		break;
 
 	case F_FILE:
 		dprintf(" %s\n", f->f_un.f_fname);
 		v->iov_base = lf;
 		v->iov_len = 1;
 		if (writev(f->f_file, iov, IOV_SIZE) < 0) {
 			/*
 			 * If writev(2) fails for potentially transient errors
 			 * like the filesystem being full, ignore it.
 			 * Otherwise remove this logfile from the list.
 			 */
 			if (errno != ENOSPC) {
 				int e = errno;
 				close_filed(f);
 				errno = e;
 				logerror(f->f_un.f_fname);
 			}
 		} else if ((flags & SYNC_FILE) && (f->f_flags & FFLAG_SYNC)) {
 			f->f_flags |= FFLAG_NEEDSYNC;
 			needdofsync = 1;
 		}
 		break;
 
 	case F_PIPE:
 		dprintf(" %s\n", f->f_un.f_pipe.f_pname);
 		v->iov_base = lf;
 		v->iov_len = 1;
 		if (f->f_un.f_pipe.f_pid == 0) {
 			if ((f->f_file = p_open(f->f_un.f_pipe.f_pname,
 						&f->f_un.f_pipe.f_pid)) < 0) {
 				f->f_type = F_UNUSED;
 				logerror(f->f_un.f_pipe.f_pname);
 				break;
 			}
 		}
 		if (writev(f->f_file, iov, IOV_SIZE) < 0) {
 			int e = errno;
 			close_filed(f);
 			if (f->f_un.f_pipe.f_pid > 0)
 				deadq_enter(f->f_un.f_pipe.f_pid,
 					    f->f_un.f_pipe.f_pname);
 			f->f_un.f_pipe.f_pid = 0;
 			errno = e;
 			logerror(f->f_un.f_pipe.f_pname);
 		}
 		break;
 
 	case F_CONSOLE:
 		if (flags & IGN_CONS) {
 			dprintf(" (ignored)\n");
 			break;
 		}
 		/* FALLTHROUGH */
 
 	case F_TTY:
 		dprintf(" %s%s\n", _PATH_DEV, f->f_un.f_fname);
 		v->iov_base = crlf;
 		v->iov_len = 2;
 
 		errno = 0;	/* ttymsg() only sometimes returns an errno */
 		if ((msgret = ttymsg(iov, IOV_SIZE, f->f_un.f_fname, 10))) {
 			f->f_type = F_UNUSED;
 			logerror(msgret);
 		}
 		break;
 
 	case F_USERS:
 	case F_WALL:
 		dprintf("\n");
 		v->iov_base = crlf;
 		v->iov_len = 2;
 		wallmsg(f, iov, IOV_SIZE);
 		break;
 	}
 	f->f_prevcount = 0;
 	free(wmsg);
 }
 
 /*
  *  WALLMSG -- Write a message to the world at large
  *
  *	Write the specified message to either the entire
  *	world, or a list of approved users.
  */
 static void
 wallmsg(struct filed *f, struct iovec *iov, const int iovlen)
 {
 	static int reenter;			/* avoid calling ourselves */
 	struct utmpx *ut;
 	int i;
 	const char *p;
 
 	if (reenter++)
 		return;
 	setutxent();
 	/* NOSTRICT */
 	while ((ut = getutxent()) != NULL) {
 		if (ut->ut_type != USER_PROCESS)
 			continue;
 		if (f->f_type == F_WALL) {
 			if ((p = ttymsg(iov, iovlen, ut->ut_line,
 			    TTYMSGTIME)) != NULL) {
 				errno = 0;	/* already in msg */
 				logerror(p);
 			}
 			continue;
 		}
 		/* should we send the message to this user? */
 		for (i = 0; i < MAXUNAMES; i++) {
 			if (!f->f_un.f_uname[i][0])
 				break;
 			if (!strcmp(f->f_un.f_uname[i], ut->ut_user)) {
 				if ((p = ttymsg_check(iov, iovlen, ut->ut_line,
 				    TTYMSGTIME)) != NULL) {
 					errno = 0;	/* already in msg */
 					logerror(p);
 				}
 				break;
 			}
 		}
 	}
 	endutxent();
 	reenter = 0;
 }
 
 /*
  * Wrapper routine for ttymsg() that checks the terminal for messages enabled.
  */
 static const char *
 ttymsg_check(struct iovec *iov, int iovcnt, char *line, int tmout)
 {
 	static char device[1024];
 	static char errbuf[1024];
 	struct stat sb;
 
 	(void) snprintf(device, sizeof(device), "%s%s", _PATH_DEV, line);
 
 	if (stat(device, &sb) < 0) {
 		(void) snprintf(errbuf, sizeof(errbuf),
 		    "%s: %s", device, strerror(errno));
 		return (errbuf);
 	}
 	if ((sb.st_mode & S_IWGRP) == 0)
 		/* Messages disabled. */
 		return (NULL);
 	return ttymsg(iov, iovcnt, line, tmout);
 }
 
 static void
 reapchild(int signo __unused)
 {
 	int status;
 	pid_t pid;
 	struct filed *f;
 
 	while ((pid = wait3(&status, WNOHANG, (struct rusage *)NULL)) > 0) {
 		if (!Initialized)
 			/* Don't tell while we are initting. */
 			continue;
 
 		/* First, look if it's a process from the dead queue. */
 		if (deadq_remove(pid))
 			goto oncemore;
 
 		/* Now, look in list of active processes. */
 		for (f = Files; f; f = f->f_next)
 			if (f->f_type == F_PIPE &&
 			    f->f_un.f_pipe.f_pid == pid) {
 				close_filed(f);
 				f->f_un.f_pipe.f_pid = 0;
 				log_deadchild(pid, status,
 					      f->f_un.f_pipe.f_pname);
 				break;
 			}
 	  oncemore:
 		continue;
 	}
 }
 
 /*
  * Return a printable representation of a host address.
  */
 static const char *
 cvthname(struct sockaddr *f)
 {
 	int error, hl;
 	sigset_t omask, nmask;
 	static char hname[NI_MAXHOST], ip[NI_MAXHOST];
 
 	error = getnameinfo((struct sockaddr *)f,
 			    ((struct sockaddr *)f)->sa_len,
 			    ip, sizeof ip, NULL, 0, NI_NUMERICHOST);
 	dprintf("cvthname(%s)\n", ip);
 
 	if (error) {
 		dprintf("Malformed from address %s\n", gai_strerror(error));
 		return ("???");
 	}
 	if (!resolve)
 		return (ip);
 
 	sigemptyset(&nmask);
 	sigaddset(&nmask, SIGHUP);
 	sigprocmask(SIG_BLOCK, &nmask, &omask);
 	error = getnameinfo((struct sockaddr *)f,
 			    ((struct sockaddr *)f)->sa_len,
 			    hname, sizeof hname, NULL, 0, NI_NAMEREQD);
 	sigprocmask(SIG_SETMASK, &omask, NULL);
 	if (error) {
 		dprintf("Host name for your address (%s) unknown\n", ip);
 		return (ip);
 	}
 	hl = strlen(hname);
 	if (hl > 0 && hname[hl-1] == '.')
 		hname[--hl] = '\0';
 	trimdomain(hname, hl);
 	return (hname);
 }
 
 static void
 dodie(int signo)
 {
 
 	WantDie = signo;
 }
 
 static void
 domark(int signo __unused)
 {
 
 	MarkSet = 1;
 }
 
 /*
  * Print syslogd errors some place.
  */
 static void
 logerror(const char *type)
 {
 	char buf[512];
 	static int recursed = 0;
 
 	/* If there's an error while trying to log an error, give up. */
 	if (recursed)
 		return;
 	recursed++;
 	if (errno)
 		(void)snprintf(buf,
 		    sizeof buf, "syslogd: %s: %s", type, strerror(errno));
 	else
 		(void)snprintf(buf, sizeof buf, "syslogd: %s", type);
 	errno = 0;
 	dprintf("%s\n", buf);
 	logmsg(LOG_SYSLOG|LOG_ERR, buf, LocalHostName, ADDDATE);
 	recursed--;
 }
 
 static void
 die(int signo)
 {
 	struct filed *f;
 	struct funix *fx;
 	int was_initialized;
 	char buf[100];
 
 	was_initialized = Initialized;
 	Initialized = 0;	/* Don't log SIGCHLDs. */
 	for (f = Files; f != NULL; f = f->f_next) {
 		/* flush any pending output */
 		if (f->f_prevcount)
 			fprintlog(f, 0, (char *)NULL);
 		if (f->f_type == F_PIPE && f->f_un.f_pipe.f_pid > 0) {
 			close_filed(f);
 			f->f_un.f_pipe.f_pid = 0;
 		}
 	}
 	Initialized = was_initialized;
 	if (signo) {
 		dprintf("syslogd: exiting on signal %d\n", signo);
 		(void)snprintf(buf, sizeof(buf), "exiting on signal %d", signo);
 		errno = 0;
 		logerror(buf);
 	}
 	STAILQ_FOREACH(fx, &funixes, next)
 		(void)unlink(fx->name);
 	pidfile_remove(pfh);
 
 	exit(1);
 }
 
 static int
 configfiles(const struct dirent *dp)
 {
 	const char *p;
 	size_t ext_len;
 
 	if (dp->d_name[0] == '.')
 		return (0);
 
 	ext_len = sizeof(include_ext) -1;
 
 	if (dp->d_namlen <= ext_len)
 		return (0);
 
 	p = &dp->d_name[dp->d_namlen - ext_len];
 	if (strcmp(p, include_ext) != 0)
 		return (0);
 
 	return (1);
 }
 
 static void
 readconfigfile(FILE *cf, struct filed **nextp, int allow_includes)
 {
 	FILE *cf2;
 	struct filed *f;
 	struct dirent **ent;
 	char cline[LINE_MAX];
 	char host[MAXHOSTNAMELEN];
 	char prog[LINE_MAX];
 	char file[MAXPATHLEN];
 	char *p, *tmp;
 	int i, nents;
 	size_t include_len;
 
 	/*
 	 *  Foreach line in the conf table, open that file.
 	 */
 	f = NULL;
 	include_len = sizeof(include_str) -1;
 	(void)strlcpy(host, "*", sizeof(host));
 	(void)strlcpy(prog, "*", sizeof(prog));
 	while (fgets(cline, sizeof(cline), cf) != NULL) {
 		/*
 		 * check for end-of-section, comments, strip off trailing
 		 * spaces and newline character. #!prog is treated specially:
 		 * following lines apply only to that program.
 		 */
 		for (p = cline; isspace(*p); ++p)
 			continue;
 		if (*p == 0)
 			continue;
 		if (allow_includes &&
 		    strncmp(p, include_str, include_len) == 0 &&
 		    isspace(p[include_len])) {
 			p += include_len;
 			while (isspace(*p))
 				p++;
 			tmp = p;
 			while (*tmp != '\0' && !isspace(*tmp))
 				tmp++;
 			*tmp = '\0';
 			dprintf("Trying to include files in '%s'\n", p);
 			nents = scandir(p, &ent, configfiles, alphasort);
 			if (nents == -1) {
 				dprintf("Unable to open '%s': %s\n", p,
 				    strerror(errno));
 				continue;
 			}
 			for (i = 0; i < nents; i++) {
 				if (snprintf(file, sizeof(file), "%s/%s", p,
 				    ent[i]->d_name) >= (int)sizeof(file)) {
 					dprintf("ignoring path too long: "
 					    "'%s/%s'\n", p, ent[i]->d_name);
 					free(ent[i]);
 					continue;
 				}
 				free(ent[i]);
 				cf2 = fopen(file, "r");
 				if (cf2 == NULL)
 					continue;
 				dprintf("reading %s\n", file);
 				readconfigfile(cf2, nextp, 0);
 				fclose(cf2);
 			}
 			free(ent);
 			continue;
 		}
 		if (*p == '#') {
 			p++;
 			if (*p != '!' && *p != '+' && *p != '-')
 				continue;
 		}
 		if (*p == '+' || *p == '-') {
 			host[0] = *p++;
 			while (isspace(*p))
 				p++;
 			if ((!*p) || (*p == '*')) {
 				(void)strlcpy(host, "*", sizeof(host));
 				continue;
 			}
 			if (*p == '@')
 				p = LocalHostName;
 			for (i = 1; i < MAXHOSTNAMELEN - 1; i++) {
 				if (!isalnum(*p) && *p != '.' && *p != '-'
 				    && *p != ',' && *p != ':' && *p != '%')
 					break;
 				host[i] = *p++;
 			}
 			host[i] = '\0';
 			continue;
 		}
 		if (*p == '!') {
 			p++;
 			while (isspace(*p)) p++;
 			if ((!*p) || (*p == '*')) {
 				(void)strlcpy(prog, "*", sizeof(prog));
 				continue;
 			}
 			for (i = 0; i < LINE_MAX - 1; i++) {
 				if (!isprint(p[i]) || isspace(p[i]))
 					break;
 				prog[i] = p[i];
 			}
 			prog[i] = 0;
 			continue;
 		}
 		for (p = cline + 1; *p != '\0'; p++) {
 			if (*p != '#')
 				continue;
 			if (*(p - 1) == '\\') {
 				strcpy(p - 1, p);
 				p--;
 				continue;
 			}
 			*p = '\0';
 			break;
 		}
 		for (i = strlen(cline) - 1; i >= 0 && isspace(cline[i]); i--)
 			cline[i] = '\0';
 		f = (struct filed *)calloc(1, sizeof(*f));
 		if (f == NULL) {
 			logerror("calloc");
 			exit(1);
 		}
 		*nextp = f;
 		nextp = &f->f_next;
 		cfline(cline, f, prog, host);
 	}
 }
 
 /*
  *  INIT -- Initialize syslogd from configuration table
  */
 static void
 init(int signo)
 {
 	int i;
 	FILE *cf;
 	struct filed *f, *next, **nextp;
 	char *p;
 	char oldLocalHostName[MAXHOSTNAMELEN];
 	char hostMsg[2*MAXHOSTNAMELEN+40];
 	char bootfileMsg[LINE_MAX];
 
 	dprintf("init\n");
 
 	/*
 	 * Load hostname (may have changed).
 	 */
 	if (signo != 0)
 		(void)strlcpy(oldLocalHostName, LocalHostName,
 		    sizeof(oldLocalHostName));
 	if (gethostname(LocalHostName, sizeof(LocalHostName)))
 		err(EX_OSERR, "gethostname() failed");
 	if ((p = strchr(LocalHostName, '.')) != NULL) {
 		*p++ = '\0';
 		LocalDomain = p;
 	} else {
 		LocalDomain = "";
 	}
 
 	/*
 	 * Load / reload timezone data (in case it changed).
 	 *
 	 * Just calling tzset() again does not work, the timezone code
 	 * caches the result.  However, by setting the TZ variable, one
 	 * can defeat the caching and have the timezone code really
 	 * reload the timezone data.  Respect any initial setting of
 	 * TZ, in case the system is configured specially.
 	 */
 	dprintf("loading timezone data via tzset()\n");
 	if (getenv("TZ")) {
 		tzset();
 	} else {
 		setenv("TZ", ":/etc/localtime", 1);
 		tzset();
 		unsetenv("TZ");
 	}
 
 	/*
 	 *  Close all open log files.
 	 */
 	Initialized = 0;
 	for (f = Files; f != NULL; f = next) {
 		/* flush any pending output */
 		if (f->f_prevcount)
 			fprintlog(f, 0, (char *)NULL);
 
 		switch (f->f_type) {
 		case F_FILE:
 		case F_FORW:
 		case F_CONSOLE:
 		case F_TTY:
 			close_filed(f);
 			break;
 		case F_PIPE:
 			if (f->f_un.f_pipe.f_pid > 0) {
 				close_filed(f);
 				deadq_enter(f->f_un.f_pipe.f_pid,
 					    f->f_un.f_pipe.f_pname);
 			}
 			f->f_un.f_pipe.f_pid = 0;
 			break;
 		}
 		next = f->f_next;
 		if (f->f_program) free(f->f_program);
 		if (f->f_host) free(f->f_host);
 		free((char *)f);
 	}
 	Files = NULL;
-	*nextp = NULL;
+	nextp = &Files;
 
 	/* open the configuration file */
 	if ((cf = fopen(ConfFile, "r")) == NULL) {
 		dprintf("cannot open %s\n", ConfFile);
 		*nextp = (struct filed *)calloc(1, sizeof(*f));
 		if (*nextp == NULL) {
 			logerror("calloc");
 			exit(1);
 		}
 		cfline("*.ERR\t/dev/console", *nextp, "*", "*");
 		(*nextp)->f_next = (struct filed *)calloc(1, sizeof(*f));
 		if ((*nextp)->f_next == NULL) {
 			logerror("calloc");
 			exit(1);
 		}
 		cfline("*.PANIC\t*", (*nextp)->f_next, "*", "*");
 		Initialized = 1;
 		return;
 	}
 
 	readconfigfile(cf, &Files, 1);
 
 	/* close the configuration file */
 	(void)fclose(cf);
 
 	Initialized = 1;
 
 	if (Debug) {
 		int port;
 		for (f = Files; f; f = f->f_next) {
 			for (i = 0; i <= LOG_NFACILITIES; i++)
 				if (f->f_pmask[i] == INTERNAL_NOPRI)
 					printf("X ");
 				else
 					printf("%d ", f->f_pmask[i]);
 			printf("%s: ", TypeNames[f->f_type]);
 			switch (f->f_type) {
 			case F_FILE:
 				printf("%s", f->f_un.f_fname);
 				break;
 
 			case F_CONSOLE:
 			case F_TTY:
 				printf("%s%s", _PATH_DEV, f->f_un.f_fname);
 				break;
 
 			case F_FORW:
 				port = (int)ntohs(((struct sockaddr_in *)
 				    (f->f_un.f_forw.f_addr->ai_addr))->sin_port);
 				if (port != 514) {
 					printf("%s:%d",
 						f->f_un.f_forw.f_hname, port);
 				} else {
 					printf("%s", f->f_un.f_forw.f_hname);
 				}
 				break;
 
 			case F_PIPE:
 				printf("%s", f->f_un.f_pipe.f_pname);
 				break;
 
 			case F_USERS:
 				for (i = 0; i < MAXUNAMES && *f->f_un.f_uname[i]; i++)
 					printf("%s, ", f->f_un.f_uname[i]);
 				break;
 			}
 			if (f->f_program)
 				printf(" (%s)", f->f_program);
 			printf("\n");
 		}
 	}
 
 	logmsg(LOG_SYSLOG|LOG_INFO, "syslogd: restart", LocalHostName, ADDDATE);
 	dprintf("syslogd: restarted\n");
 	/*
 	 * Log a change in hostname, but only on a restart.
 	 */
 	if (signo != 0 && strcmp(oldLocalHostName, LocalHostName) != 0) {
 		(void)snprintf(hostMsg, sizeof(hostMsg),
 		    "syslogd: hostname changed, \"%s\" to \"%s\"",
 		    oldLocalHostName, LocalHostName);
 		logmsg(LOG_SYSLOG|LOG_INFO, hostMsg, LocalHostName, ADDDATE);
 		dprintf("%s\n", hostMsg);
 	}
 	/*
 	 * Log the kernel boot file if we aren't going to use it as
 	 * the prefix, and if this is *not* a restart.
 	 */
 	if (signo == 0 && !use_bootfile) {
 		(void)snprintf(bootfileMsg, sizeof(bootfileMsg),
 		    "syslogd: kernel boot file is %s", bootfile);
 		logmsg(LOG_KERN|LOG_INFO, bootfileMsg, LocalHostName, ADDDATE);
 		dprintf("%s\n", bootfileMsg);
 	}
 }
 
 /*
  * Crack a configuration file line
  */
 static void
 cfline(const char *line, struct filed *f, const char *prog, const char *host)
 {
 	struct addrinfo hints, *res;
 	int error, i, pri, syncfile;
 	const char *p, *q;
 	char *bp;
 	char buf[MAXLINE], ebuf[100];
 
 	dprintf("cfline(\"%s\", f, \"%s\", \"%s\")\n", line, prog, host);
 
 	errno = 0;	/* keep strerror() stuff out of logerror messages */
 
 	/* clear out file entry */
 	memset(f, 0, sizeof(*f));
 	for (i = 0; i <= LOG_NFACILITIES; i++)
 		f->f_pmask[i] = INTERNAL_NOPRI;
 
 	/* save hostname if any */
 	if (host && *host == '*')
 		host = NULL;
 	if (host) {
 		int hl;
 
 		f->f_host = strdup(host);
 		if (f->f_host == NULL) {
 			logerror("strdup");
 			exit(1);
 		}
 		hl = strlen(f->f_host);
 		if (hl > 0 && f->f_host[hl-1] == '.')
 			f->f_host[--hl] = '\0';
 		trimdomain(f->f_host, hl);
 	}
 
 	/* save program name if any */
 	if (prog && *prog == '*')
 		prog = NULL;
 	if (prog) {
 		f->f_program = strdup(prog);
 		if (f->f_program == NULL) {
 			logerror("strdup");
 			exit(1);
 		}
 	}
 
 	/* scan through the list of selectors */
 	for (p = line; *p && *p != '\t' && *p != ' ';) {
 		int pri_done;
 		int pri_cmp;
 		int pri_invert;
 
 		/* find the end of this facility name list */
 		for (q = p; *q && *q != '\t' && *q != ' ' && *q++ != '.'; )
 			continue;
 
 		/* get the priority comparison */
 		pri_cmp = 0;
 		pri_done = 0;
 		pri_invert = 0;
 		if (*q == '!') {
 			pri_invert = 1;
 			q++;
 		}
 		while (!pri_done) {
 			switch (*q) {
 			case '<':
 				pri_cmp |= PRI_LT;
 				q++;
 				break;
 			case '=':
 				pri_cmp |= PRI_EQ;
 				q++;
 				break;
 			case '>':
 				pri_cmp |= PRI_GT;
 				q++;
 				break;
 			default:
 				pri_done++;
 				break;
 			}
 		}
 
 		/* collect priority name */
 		for (bp = buf; *q && !strchr("\t,; ", *q); )
 			*bp++ = *q++;
 		*bp = '\0';
 
 		/* skip cruft */
 		while (strchr(",;", *q))
 			q++;
 
 		/* decode priority name */
 		if (*buf == '*') {
 			pri = LOG_PRIMASK;
 			pri_cmp = PRI_LT | PRI_EQ | PRI_GT;
 		} else {
 			/* Ignore trailing spaces. */
 			for (i = strlen(buf) - 1; i >= 0 && buf[i] == ' '; i--)
 				buf[i] = '\0';
 
 			pri = decode(buf, prioritynames);
 			if (pri < 0) {
 				errno = 0;
 				(void)snprintf(ebuf, sizeof ebuf,
 				    "unknown priority name \"%s\"", buf);
 				logerror(ebuf);
 				return;
 			}
 		}
 		if (!pri_cmp)
 			pri_cmp = (UniquePriority)
 				  ? (PRI_EQ)
 				  : (PRI_EQ | PRI_GT)
 				  ;
 		if (pri_invert)
 			pri_cmp ^= PRI_LT | PRI_EQ | PRI_GT;
 
 		/* scan facilities */
 		while (*p && !strchr("\t.; ", *p)) {
 			for (bp = buf; *p && !strchr("\t,;. ", *p); )
 				*bp++ = *p++;
 			*bp = '\0';
 
 			if (*buf == '*') {
 				for (i = 0; i < LOG_NFACILITIES; i++) {
 					f->f_pmask[i] = pri;
 					f->f_pcmp[i] = pri_cmp;
 				}
 			} else {
 				i = decode(buf, facilitynames);
 				if (i < 0) {
 					errno = 0;
 					(void)snprintf(ebuf, sizeof ebuf,
 					    "unknown facility name \"%s\"",
 					    buf);
 					logerror(ebuf);
 					return;
 				}
 				f->f_pmask[i >> 3] = pri;
 				f->f_pcmp[i >> 3] = pri_cmp;
 			}
 			while (*p == ',' || *p == ' ')
 				p++;
 		}
 
 		p = q;
 	}
 
 	/* skip to action part */
 	while (*p == '\t' || *p == ' ')
 		p++;
 
 	if (*p == '-') {
 		syncfile = 0;
 		p++;
 	} else
 		syncfile = 1;
 
 	switch (*p) {
 	case '@':
 		{
 			char *tp;
 			char endkey = ':';
 			/*
 			 * scan forward to see if there is a port defined.
 			 * so we can't use strlcpy..
 			 */
 			i = sizeof(f->f_un.f_forw.f_hname);
 			tp = f->f_un.f_forw.f_hname;
 			p++;
 
 			/*
 			 * an ipv6 address should start with a '[' in that case
 			 * we should scan for a ']'
 			 */
 			if (*p == '[') {
 				p++;
 				endkey = ']';
 			}
 			while (*p && (*p != endkey) && (i-- > 0)) {
 				*tp++ = *p++;
 			}
 			if (endkey == ']' && *p == endkey)
 				p++;
 			*tp = '\0';
 		}
 		/* See if we copied a domain and have a port */
 		if (*p == ':')
 			p++;
 		else
 			p = NULL;
 
 		memset(&hints, 0, sizeof(hints));
 		hints.ai_family = family;
 		hints.ai_socktype = SOCK_DGRAM;
 		error = getaddrinfo(f->f_un.f_forw.f_hname,
 				p ? p : "syslog", &hints, &res);
 		if (error) {
 			logerror(gai_strerror(error));
 			break;
 		}
 		f->f_un.f_forw.f_addr = res;
 		f->f_type = F_FORW;
 		break;
 
 	case '/':
 		if ((f->f_file = open(p, logflags, 0600)) < 0) {
 			f->f_type = F_UNUSED;
 			logerror(p);
 			break;
 		}
 		if (syncfile)
 			f->f_flags |= FFLAG_SYNC;
 		if (isatty(f->f_file)) {
 			if (strcmp(p, ctty) == 0)
 				f->f_type = F_CONSOLE;
 			else
 				f->f_type = F_TTY;
 			(void)strlcpy(f->f_un.f_fname, p + sizeof(_PATH_DEV) - 1,
 			    sizeof(f->f_un.f_fname));
 		} else {
 			(void)strlcpy(f->f_un.f_fname, p, sizeof(f->f_un.f_fname));
 			f->f_type = F_FILE;
 		}
 		break;
 
 	case '|':
 		f->f_un.f_pipe.f_pid = 0;
 		(void)strlcpy(f->f_un.f_pipe.f_pname, p + 1,
 		    sizeof(f->f_un.f_pipe.f_pname));
 		f->f_type = F_PIPE;
 		break;
 
 	case '*':
 		f->f_type = F_WALL;
 		break;
 
 	default:
 		for (i = 0; i < MAXUNAMES && *p; i++) {
 			for (q = p; *q && *q != ','; )
 				q++;
 			(void)strncpy(f->f_un.f_uname[i], p, MAXLOGNAME - 1);
 			if ((q - p) >= MAXLOGNAME)
 				f->f_un.f_uname[i][MAXLOGNAME - 1] = '\0';
 			else
 				f->f_un.f_uname[i][q - p] = '\0';
 			while (*q == ',' || *q == ' ')
 				q++;
 			p = q;
 		}
 		f->f_type = F_USERS;
 		break;
 	}
 }
 
 
 /*
  *  Decode a symbolic name to a numeric value
  */
 static int
 decode(const char *name, const CODE *codetab)
 {
 	const CODE *c;
 	char *p, buf[40];
 
 	if (isdigit(*name))
 		return (atoi(name));
 
 	for (p = buf; *name && p < &buf[sizeof(buf) - 1]; p++, name++) {
 		if (isupper(*name))
 			*p = tolower(*name);
 		else
 			*p = *name;
 	}
 	*p = '\0';
 	for (c = codetab; c->c_name; c++)
 		if (!strcmp(buf, c->c_name))
 			return (c->c_val);
 
 	return (-1);
 }
 
 static void
 markit(void)
 {
 	struct filed *f;
 	dq_t q, next;
 
 	now = time((time_t *)NULL);
 	MarkSeq += TIMERINTVL;
 	if (MarkSeq >= MarkInterval) {
 		logmsg(LOG_INFO, "-- MARK --",
 		    LocalHostName, ADDDATE|MARK);
 		MarkSeq = 0;
 	}
 
 	for (f = Files; f; f = f->f_next) {
 		if (f->f_prevcount && now >= REPEATTIME(f)) {
 			dprintf("flush %s: repeated %d times, %d sec.\n",
 			    TypeNames[f->f_type], f->f_prevcount,
 			    repeatinterval[f->f_repeatcount]);
 			fprintlog(f, 0, (char *)NULL);
 			BACKOFF(f);
 		}
 	}
 
 	/* Walk the dead queue, and see if we should signal somebody. */
 	for (q = TAILQ_FIRST(&deadq_head); q != NULL; q = next) {
 		next = TAILQ_NEXT(q, dq_entries);
 
 		switch (q->dq_timeout) {
 		case 0:
 			/* Already signalled once, try harder now. */
 			if (kill(q->dq_pid, SIGKILL) != 0)
 				(void)deadq_remove(q->dq_pid);
 			break;
 
 		case 1:
 			/*
 			 * Timed out on dead queue, send terminate
 			 * signal.  Note that we leave the removal
 			 * from the dead queue to reapchild(), which
 			 * will also log the event (unless the process
 			 * didn't even really exist, in case we simply
 			 * drop it from the dead queue).
 			 */
 			if (kill(q->dq_pid, SIGTERM) != 0)
 				(void)deadq_remove(q->dq_pid);
 			/* FALLTHROUGH */
 
 		default:
 			q->dq_timeout--;
 		}
 	}
 	MarkSet = 0;
 	(void)alarm(TIMERINTVL);
 }
 
 /*
  * fork off and become a daemon, but wait for the child to come online
  * before returing to the parent, or we get disk thrashing at boot etc.
  * Set a timer so we don't hang forever if it wedges.
  */
 static int
 waitdaemon(int nochdir, int noclose, int maxwait)
 {
 	int fd;
 	int status;
 	pid_t pid, childpid;
 
 	switch (childpid = fork()) {
 	case -1:
 		return (-1);
 	case 0:
 		break;
 	default:
 		signal(SIGALRM, timedout);
 		alarm(maxwait);
 		while ((pid = wait3(&status, 0, NULL)) != -1) {
 			if (WIFEXITED(status))
 				errx(1, "child pid %d exited with return code %d",
 					pid, WEXITSTATUS(status));
 			if (WIFSIGNALED(status))
 				errx(1, "child pid %d exited on signal %d%s",
 					pid, WTERMSIG(status),
 					WCOREDUMP(status) ? " (core dumped)" :
 					"");
 			if (pid == childpid)	/* it's gone... */
 				break;
 		}
 		exit(0);
 	}
 
 	if (setsid() == -1)
 		return (-1);
 
 	if (!nochdir)
 		(void)chdir("/");
 
 	if (!noclose && (fd = open(_PATH_DEVNULL, O_RDWR, 0)) != -1) {
 		(void)dup2(fd, STDIN_FILENO);
 		(void)dup2(fd, STDOUT_FILENO);
 		(void)dup2(fd, STDERR_FILENO);
 		if (fd > 2)
 			(void)close (fd);
 	}
 	return (getppid());
 }
 
 /*
  * We get a SIGALRM from the child when it's running and finished doing it's
  * fsync()'s or O_SYNC writes for all the boot messages.
  *
  * We also get a signal from the kernel if the timer expires, so check to
  * see what happened.
  */
 static void
 timedout(int sig __unused)
 {
 	int left;
 	left = alarm(0);
 	signal(SIGALRM, SIG_DFL);
 	if (left == 0)
 		errx(1, "timed out waiting for child");
 	else
 		_exit(0);
 }
 
 /*
  * Add `s' to the list of allowable peer addresses to accept messages
  * from.
  *
  * `s' is a string in the form:
  *
  *    [*]domainname[:{servicename|portnumber|*}]
  *
  * or
  *
  *    netaddr/maskbits[:{servicename|portnumber|*}]
  *
  * Returns -1 on error, 0 if the argument was valid.
  */
 static int
 allowaddr(char *s)
 {
 	char *cp1, *cp2;
 	struct allowedpeer ap;
 	struct servent *se;
 	int masklen = -1;
 	struct addrinfo hints, *res;
 	struct in_addr *addrp, *maskp;
 #ifdef INET6
 	int i;
 	u_int32_t *addr6p, *mask6p;
 #endif
 	char ip[NI_MAXHOST];
 
 #ifdef INET6
 	if (*s != '[' || (cp1 = strchr(s + 1, ']')) == NULL)
 #endif
 		cp1 = s;
 	if ((cp1 = strrchr(cp1, ':'))) {
 		/* service/port provided */
 		*cp1++ = '\0';
 		if (strlen(cp1) == 1 && *cp1 == '*')
 			/* any port allowed */
 			ap.port = 0;
 		else if ((se = getservbyname(cp1, "udp"))) {
 			ap.port = ntohs(se->s_port);
 		} else {
 			ap.port = strtol(cp1, &cp2, 0);
 			if (*cp2 != '\0')
 				return (-1); /* port not numeric */
 		}
 	} else {
 		if ((se = getservbyname("syslog", "udp")))
 			ap.port = ntohs(se->s_port);
 		else
 			/* sanity, should not happen */
 			ap.port = 514;
 	}
 
 	if ((cp1 = strchr(s, '/')) != NULL &&
 	    strspn(cp1 + 1, "0123456789") == strlen(cp1 + 1)) {
 		*cp1 = '\0';
 		if ((masklen = atoi(cp1 + 1)) < 0)
 			return (-1);
 	}
 #ifdef INET6
 	if (*s == '[') {
 		cp2 = s + strlen(s) - 1;
 		if (*cp2 == ']') {
 			++s;
 			*cp2 = '\0';
 		} else {
 			cp2 = NULL;
 		}
 	} else {
 		cp2 = NULL;
 	}
 #endif
 	memset(&hints, 0, sizeof(hints));
 	hints.ai_family = PF_UNSPEC;
 	hints.ai_socktype = SOCK_DGRAM;
 	hints.ai_flags = AI_PASSIVE | AI_NUMERICHOST;
 	if (getaddrinfo(s, NULL, &hints, &res) == 0) {
 		ap.isnumeric = 1;
 		memcpy(&ap.a_addr, res->ai_addr, res->ai_addrlen);
 		memset(&ap.a_mask, 0, sizeof(ap.a_mask));
 		ap.a_mask.ss_family = res->ai_family;
 		if (res->ai_family == AF_INET) {
 			ap.a_mask.ss_len = sizeof(struct sockaddr_in);
 			maskp = &((struct sockaddr_in *)&ap.a_mask)->sin_addr;
 			addrp = &((struct sockaddr_in *)&ap.a_addr)->sin_addr;
 			if (masklen < 0) {
 				/* use default netmask */
 				if (IN_CLASSA(ntohl(addrp->s_addr)))
 					maskp->s_addr = htonl(IN_CLASSA_NET);
 				else if (IN_CLASSB(ntohl(addrp->s_addr)))
 					maskp->s_addr = htonl(IN_CLASSB_NET);
 				else
 					maskp->s_addr = htonl(IN_CLASSC_NET);
 			} else if (masklen <= 32) {
 				/* convert masklen to netmask */
 				if (masklen == 0)
 					maskp->s_addr = 0;
 				else
 					maskp->s_addr = htonl(~((1 << (32 - masklen)) - 1));
 			} else {
 				freeaddrinfo(res);
 				return (-1);
 			}
 			/* Lose any host bits in the network number. */
 			addrp->s_addr &= maskp->s_addr;
 		}
 #ifdef INET6
 		else if (res->ai_family == AF_INET6 && masklen <= 128) {
 			ap.a_mask.ss_len = sizeof(struct sockaddr_in6);
 			if (masklen < 0)
 				masklen = 128;
 			mask6p = (u_int32_t *)&((struct sockaddr_in6 *)&ap.a_mask)->sin6_addr;
 			/* convert masklen to netmask */
 			while (masklen > 0) {
 				if (masklen < 32) {
 					*mask6p = htonl(~(0xffffffff >> masklen));
 					break;
 				}
 				*mask6p++ = 0xffffffff;
 				masklen -= 32;
 			}
 			/* Lose any host bits in the network number. */
 			mask6p = (u_int32_t *)&((struct sockaddr_in6 *)&ap.a_mask)->sin6_addr;
 			addr6p = (u_int32_t *)&((struct sockaddr_in6 *)&ap.a_addr)->sin6_addr;
 			for (i = 0; i < 4; i++)
 				addr6p[i] &= mask6p[i];
 		}
 #endif
 		else {
 			freeaddrinfo(res);
 			return (-1);
 		}
 		freeaddrinfo(res);
 	} else {
 		/* arg `s' is domain name */
 		ap.isnumeric = 0;
 		ap.a_name = s;
 		if (cp1)
 			*cp1 = '/';
 #ifdef INET6
 		if (cp2) {
 			*cp2 = ']';
 			--s;
 		}
 #endif
 	}
 
 	if (Debug) {
 		printf("allowaddr: rule %d: ", NumAllowed);
 		if (ap.isnumeric) {
 			printf("numeric, ");
 			getnameinfo((struct sockaddr *)&ap.a_addr,
 				    ((struct sockaddr *)&ap.a_addr)->sa_len,
 				    ip, sizeof ip, NULL, 0, NI_NUMERICHOST);
 			printf("addr = %s, ", ip);
 			getnameinfo((struct sockaddr *)&ap.a_mask,
 				    ((struct sockaddr *)&ap.a_mask)->sa_len,
 				    ip, sizeof ip, NULL, 0, NI_NUMERICHOST);
 			printf("mask = %s; ", ip);
 		} else {
 			printf("domainname = %s; ", ap.a_name);
 		}
 		printf("port = %d\n", ap.port);
 	}
 
 	if ((AllowedPeers = realloc(AllowedPeers,
 				    ++NumAllowed * sizeof(struct allowedpeer)))
 	    == NULL) {
 		logerror("realloc");
 		exit(1);
 	}
 	memcpy(&AllowedPeers[NumAllowed - 1], &ap, sizeof(struct allowedpeer));
 	return (0);
 }
 
 /*
  * Validate that the remote peer has permission to log to us.
  */
 static int
 validate(struct sockaddr *sa, const char *hname)
 {
 	int i;
 	size_t l1, l2;
 	char *cp, name[NI_MAXHOST], ip[NI_MAXHOST], port[NI_MAXSERV];
 	struct allowedpeer *ap;
 	struct sockaddr_in *sin4, *a4p = NULL, *m4p = NULL;
 #ifdef INET6
 	int j, reject;
 	struct sockaddr_in6 *sin6, *a6p = NULL, *m6p = NULL;
 #endif
 	struct addrinfo hints, *res;
 	u_short sport;
 
 	if (NumAllowed == 0)
 		/* traditional behaviour, allow everything */
 		return (1);
 
 	(void)strlcpy(name, hname, sizeof(name));
 	memset(&hints, 0, sizeof(hints));
 	hints.ai_family = PF_UNSPEC;
 	hints.ai_socktype = SOCK_DGRAM;
 	hints.ai_flags = AI_PASSIVE | AI_NUMERICHOST;
 	if (getaddrinfo(name, NULL, &hints, &res) == 0)
 		freeaddrinfo(res);
 	else if (strchr(name, '.') == NULL) {
 		strlcat(name, ".", sizeof name);
 		strlcat(name, LocalDomain, sizeof name);
 	}
 	if (getnameinfo(sa, sa->sa_len, ip, sizeof ip, port, sizeof port,
 			NI_NUMERICHOST | NI_NUMERICSERV) != 0)
 		return (0);	/* for safety, should not occur */
 	dprintf("validate: dgram from IP %s, port %s, name %s;\n",
 		ip, port, name);
 	sport = atoi(port);
 
 	/* now, walk down the list */
 	for (i = 0, ap = AllowedPeers; i < NumAllowed; i++, ap++) {
 		if (ap->port != 0 && ap->port != sport) {
 			dprintf("rejected in rule %d due to port mismatch.\n", i);
 			continue;
 		}
 
 		if (ap->isnumeric) {
 			if (ap->a_addr.ss_family != sa->sa_family) {
 				dprintf("rejected in rule %d due to address family mismatch.\n", i);
 				continue;
 			}
 			if (ap->a_addr.ss_family == AF_INET) {
 				sin4 = (struct sockaddr_in *)sa;
 				a4p = (struct sockaddr_in *)&ap->a_addr;
 				m4p = (struct sockaddr_in *)&ap->a_mask;
 				if ((sin4->sin_addr.s_addr & m4p->sin_addr.s_addr)
 				    != a4p->sin_addr.s_addr) {
 					dprintf("rejected in rule %d due to IP mismatch.\n", i);
 					continue;
 				}
 			}
 #ifdef INET6
 			else if (ap->a_addr.ss_family == AF_INET6) {
 				sin6 = (struct sockaddr_in6 *)sa;
 				a6p = (struct sockaddr_in6 *)&ap->a_addr;
 				m6p = (struct sockaddr_in6 *)&ap->a_mask;
 				if (a6p->sin6_scope_id != 0 &&
 				    sin6->sin6_scope_id != a6p->sin6_scope_id) {
 					dprintf("rejected in rule %d due to scope mismatch.\n", i);
 					continue;
 				}
 				reject = 0;
 				for (j = 0; j < 16; j += 4) {
 					if ((*(u_int32_t *)&sin6->sin6_addr.s6_addr[j] & *(u_int32_t *)&m6p->sin6_addr.s6_addr[j])
 					    != *(u_int32_t *)&a6p->sin6_addr.s6_addr[j]) {
 						++reject;
 						break;
 					}
 				}
 				if (reject) {
 					dprintf("rejected in rule %d due to IP mismatch.\n", i);
 					continue;
 				}
 			}
 #endif
 			else
 				continue;
 		} else {
 			cp = ap->a_name;
 			l1 = strlen(name);
 			if (*cp == '*') {
 				/* allow wildmatch */
 				cp++;
 				l2 = strlen(cp);
 				if (l2 > l1 || memcmp(cp, &name[l1 - l2], l2) != 0) {
 					dprintf("rejected in rule %d due to name mismatch.\n", i);
 					continue;
 				}
 			} else {
 				/* exact match */
 				l2 = strlen(cp);
 				if (l2 != l1 || memcmp(cp, name, l1) != 0) {
 					dprintf("rejected in rule %d due to name mismatch.\n", i);
 					continue;
 				}
 			}
 		}
 		dprintf("accepted in rule %d.\n", i);
 		return (1);	/* hooray! */
 	}
 	return (0);
 }
 
 /*
  * Fairly similar to popen(3), but returns an open descriptor, as
  * opposed to a FILE *.
  */
 static int
 p_open(const char *prog, pid_t *rpid)
 {
 	int pfd[2], nulldesc;
 	pid_t pid;
 	sigset_t omask, mask;
 	char *argv[4]; /* sh -c cmd NULL */
 	char errmsg[200];
 
 	if (pipe(pfd) == -1)
 		return (-1);
 	if ((nulldesc = open(_PATH_DEVNULL, O_RDWR)) == -1)
 		/* we are royally screwed anyway */
 		return (-1);
 
 	sigemptyset(&mask);
 	sigaddset(&mask, SIGALRM);
 	sigaddset(&mask, SIGHUP);
 	sigprocmask(SIG_BLOCK, &mask, &omask);
 	switch ((pid = fork())) {
 	case -1:
 		sigprocmask(SIG_SETMASK, &omask, 0);
 		close(nulldesc);
 		return (-1);
 
 	case 0:
 		argv[0] = strdup("sh");
 		argv[1] = strdup("-c");
 		argv[2] = strdup(prog);
 		argv[3] = NULL;
 		if (argv[0] == NULL || argv[1] == NULL || argv[2] == NULL) {
 			logerror("strdup");
 			exit(1);
 		}
 
 		alarm(0);
 		(void)setsid();	/* Avoid catching SIGHUPs. */
 
 		/*
 		 * Throw away pending signals, and reset signal
 		 * behaviour to standard values.
 		 */
 		signal(SIGALRM, SIG_IGN);
 		signal(SIGHUP, SIG_IGN);
 		sigprocmask(SIG_SETMASK, &omask, 0);
 		signal(SIGPIPE, SIG_DFL);
 		signal(SIGQUIT, SIG_DFL);
 		signal(SIGALRM, SIG_DFL);
 		signal(SIGHUP, SIG_DFL);
 
 		dup2(pfd[0], STDIN_FILENO);
 		dup2(nulldesc, STDOUT_FILENO);
 		dup2(nulldesc, STDERR_FILENO);
 		closefrom(3);
 
 		(void)execvp(_PATH_BSHELL, argv);
 		_exit(255);
 	}
 
 	sigprocmask(SIG_SETMASK, &omask, 0);
 	close(nulldesc);
 	close(pfd[0]);
 	/*
 	 * Avoid blocking on a hung pipe.  With O_NONBLOCK, we are
 	 * supposed to get an EWOULDBLOCK on writev(2), which is
 	 * caught by the logic above anyway, which will in turn close
 	 * the pipe, and fork a new logging subprocess if necessary.
 	 * The stale subprocess will be killed some time later unless
 	 * it terminated itself due to closing its input pipe (so we
 	 * get rid of really dead puppies).
 	 */
 	if (fcntl(pfd[1], F_SETFL, O_NONBLOCK) == -1) {
 		/* This is bad. */
 		(void)snprintf(errmsg, sizeof errmsg,
 			       "Warning: cannot change pipe to PID %d to "
 			       "non-blocking behaviour.",
 			       (int)pid);
 		logerror(errmsg);
 	}
 	*rpid = pid;
 	return (pfd[1]);
 }
 
 static void
 deadq_enter(pid_t pid, const char *name)
 {
 	dq_t p;
 	int status;
 
 	/*
 	 * Be paranoid, if we can't signal the process, don't enter it
 	 * into the dead queue (perhaps it's already dead).  If possible,
 	 * we try to fetch and log the child's status.
 	 */
 	if (kill(pid, 0) != 0) {
 		if (waitpid(pid, &status, WNOHANG) > 0)
 			log_deadchild(pid, status, name);
 		return;
 	}
 
 	p = malloc(sizeof(struct deadq_entry));
 	if (p == NULL) {
 		logerror("malloc");
 		exit(1);
 	}
 
 	p->dq_pid = pid;
 	p->dq_timeout = DQ_TIMO_INIT;
 	TAILQ_INSERT_TAIL(&deadq_head, p, dq_entries);
 }
 
 static int
 deadq_remove(pid_t pid)
 {
 	dq_t q;
 
 	TAILQ_FOREACH(q, &deadq_head, dq_entries) {
 		if (q->dq_pid == pid) {
 			TAILQ_REMOVE(&deadq_head, q, dq_entries);
 				free(q);
 				return (1);
 		}
 	}
 
 	return (0);
 }
 
 static void
 log_deadchild(pid_t pid, int status, const char *name)
 {
 	int code;
 	char buf[256];
 	const char *reason;
 
 	errno = 0; /* Keep strerror() stuff out of logerror messages. */
 	if (WIFSIGNALED(status)) {
 		reason = "due to signal";
 		code = WTERMSIG(status);
 	} else {
 		reason = "with status";
 		code = WEXITSTATUS(status);
 		if (code == 0)
 			return;
 	}
 	(void)snprintf(buf, sizeof buf,
 		       "Logging subprocess %d (%s) exited %s %d.",
 		       pid, name, reason, code);
 	logerror(buf);
 }
 
 static int *
 socksetup(int af, char *bindhostname)
 {
 	struct addrinfo hints, *res, *r;
 	const char *bindservice;
 	char *cp;
 	int error, maxs, *s, *socks;
 
 	/*
 	 * We have to handle this case for backwards compatibility:
 	 * If there are two (or more) colons but no '[' and ']',
 	 * assume this is an inet6 address without a service.
 	 */
 	bindservice = "syslog";
 	if (bindhostname != NULL) {
 #ifdef INET6
 		if (*bindhostname == '[' &&
 		    (cp = strchr(bindhostname + 1, ']')) != NULL) {
 			++bindhostname;
 			*cp = '\0';
 			if (cp[1] == ':' && cp[2] != '\0')
 				bindservice = cp + 2;
 		} else {
 #endif
 			cp = strchr(bindhostname, ':');
 			if (cp != NULL && strchr(cp + 1, ':') == NULL) {
 				*cp = '\0';
 				if (cp[1] != '\0')
 					bindservice = cp + 1;
 				if (cp == bindhostname)
 					bindhostname = NULL;
 			}
 #ifdef INET6
 		}
 #endif
 	}
 
 	memset(&hints, 0, sizeof(hints));
 	hints.ai_flags = AI_PASSIVE;
 	hints.ai_family = af;
 	hints.ai_socktype = SOCK_DGRAM;
 	error = getaddrinfo(bindhostname, bindservice, &hints, &res);
 	if (error) {
 		logerror(gai_strerror(error));
 		errno = 0;
 		die(0);
 	}
 
 	/* Count max number of sockets we may open */
 	for (maxs = 0, r = res; r; r = r->ai_next, maxs++);
 	socks = malloc((maxs+1) * sizeof(int));
 	if (socks == NULL) {
 		logerror("couldn't allocate memory for sockets");
 		die(0);
 	}
 
 	*socks = 0;   /* num of sockets counter at start of array */
 	s = socks + 1;
 	for (r = res; r; r = r->ai_next) {
 		int on = 1;
 		*s = socket(r->ai_family, r->ai_socktype, r->ai_protocol);
 		if (*s < 0) {
 			logerror("socket");
 			continue;
 		}
 #ifdef INET6
 		if (r->ai_family == AF_INET6) {
 			if (setsockopt(*s, IPPROTO_IPV6, IPV6_V6ONLY,
 				       (char *)&on, sizeof (on)) < 0) {
 				logerror("setsockopt");
 				close(*s);
 				continue;
 			}
 		}
 #endif
 		if (setsockopt(*s, SOL_SOCKET, SO_REUSEADDR,
 			       (char *)&on, sizeof (on)) < 0) {
 			logerror("setsockopt");
 			close(*s);
 			continue;
 		}
 		/*
 		 * RFC 3164 recommends that client side message
 		 * should come from the privileged syslogd port.
 		 *
 		 * If the system administrator choose not to obey
 		 * this, we can skip the bind() step so that the
 		 * system will choose a port for us.
 		 */
 		if (!NoBind) {
 			if (bind(*s, r->ai_addr, r->ai_addrlen) < 0) {
 				logerror("bind");
 				close(*s);
 				continue;
 			}
 
 			if (!SecureMode)
 				increase_rcvbuf(*s);
 		}
 
 		(*socks)++;
 		dprintf("socksetup: new socket fd is %d\n", *s);
 		s++;
 	}
 
 	if (*socks == 0) {
 		free(socks);
 		if (Debug)
 			return (NULL);
 		else
 			die(0);
 	}
 	if (res)
 		freeaddrinfo(res);
 
 	return (socks);
 }
 
 static void
 increase_rcvbuf(int fd)
 {
 	socklen_t len, slen;
 
 	slen = sizeof(len);
 
 	if (getsockopt(fd, SOL_SOCKET, SO_RCVBUF, &len, &slen) == 0) {
 		if (len < RCVBUF_MINSIZE) {
 			len = RCVBUF_MINSIZE;
 			setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &len, sizeof(len));
 		}
 	}
 }
Index: projects/clang391-import
===================================================================
--- projects/clang391-import	(revision 309262)
+++ projects/clang391-import	(revision 309263)

Property changes on: projects/clang391-import
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head:r309213-309262